From 2ab9256e8abff9e6f5c6a85aa262120aa03da859 Mon Sep 17 00:00:00 2001 From: BitToby <218712309+bittoby@users.noreply.github.com> Date: Wed, 17 Jun 2026 18:14:13 +0700 Subject: [PATCH] fix(go): correct OpenRouter streaming URL routing and reasoning parameter (#16111) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? Fixes two bugs in the OpenRouter streaming chat request builder (`internal/entity/models/openrouter.go`, `ChatStreamlyWithSender`): 1. **qwen/glm models streamed to a broken URL.** The code routed any `qwen`/`glm` model to `URLSuffix.AsyncChat`, but `conf/models/openrouter.json` defines no `async_chat` suffix (empty), so the request was POSTed to `/` instead of `/chat/completions` — breaking streaming for every qwen/glm model. The non-stream path has no such branch. Fix: all models use the standard `Chat` suffix, consistent with the non-stream path. 2. **Streaming reasoning was never enabled.** The request set reasoning via a non-standard `thinking` key, which OpenRouter ignores. OpenRouter's API — and this provider's own non-stream request (line ~110) and its streamed `delta.reasoning` parser (line ~311) — use the `reasoning` object. Fix: send `reasoning: {"enabled": }` (and `{"effort": ...}` when set, taking precedence as in the non-stream path). Closes #16110 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- internal/entity/models/openrouter.go | 27 ++++++------ internal/entity/models/openrouter_test.go | 54 +++++++++++++++++++++++ 2 files changed, 67 insertions(+), 14 deletions(-) diff --git a/internal/entity/models/openrouter.go b/internal/entity/models/openrouter.go index adbf0a4a4f..8c7054f104 100644 --- a/internal/entity/models/openrouter.go +++ b/internal/entity/models/openrouter.go @@ -202,13 +202,12 @@ func (o *OpenRouterModel) ChatStreamlyWithSender(modelName string, messages []Me if err != nil { return err } + // All OpenRouter models use the standard chat-completions endpoint, same as + // the non-stream path. The previous qwen/glm branch routed to URLSuffix.AsyncChat, + // which OpenRouter does not configure (empty suffix) — producing a broken URL and + // breaking streaming for every qwen/glm model. url := fmt.Sprintf("%s/%s", resolvedBaseURL, o.baseModel.URLSuffix.Chat) - modelType := strings.Split(modelName, "_")[0] - if modelType == "qwen" || modelType == "glm" { - url = fmt.Sprintf("%s/%s", resolvedBaseURL, o.baseModel.URLSuffix.AsyncChat) - } - // Convert messages to API format apiMessages := make([]map[string]interface{}, len(messages)) for i, msg := range messages { @@ -250,16 +249,16 @@ func (o *OpenRouterModel) ChatStreamlyWithSender(modelName string, messages []Me reqBody["stop"] = *modelConfig.Stop } + // OpenRouter controls reasoning via the standard `reasoning` request object + // (the non-stream path and the streamed `delta.reasoning` response use it too). + // The previous `thinking` key is non-standard and silently ignored by the API, + // so streaming reasoning was never actually enabled. `effort` takes precedence, + // matching the non-stream path. if modelConfig.Thinking != nil { - if *modelConfig.Thinking { - reqBody["thinking"] = map[string]interface{}{ - "type": "enabled", - } - } else { - reqBody["thinking"] = map[string]interface{}{ - "type": "disabled", - } - } + reqBody["reasoning"] = map[string]interface{}{"enabled": *modelConfig.Thinking} + } + if modelConfig.Effort != nil { + reqBody["reasoning"] = map[string]interface{}{"effort": *modelConfig.Effort} } } diff --git a/internal/entity/models/openrouter_test.go b/internal/entity/models/openrouter_test.go index 8276a28f2d..598195a1d4 100644 --- a/internal/entity/models/openrouter_test.go +++ b/internal/entity/models/openrouter_test.go @@ -218,3 +218,57 @@ func TestOpenRouterTranscribeAudioHTTPError(t *testing.T) { t.Fatalf("err=%v", err) } } + +// TestOpenRouterChatStreamlyRequest verifies the streaming chat request is built +// correctly: every model (including qwen/glm) targets the standard chat endpoint, +// and reasoning is requested via OpenRouter's standard `reasoning` object rather +// than the non-standard `thinking` key that the API silently ignores. +func TestOpenRouterChatStreamlyRequest(t *testing.T) { + var gotPath string + var gotBody map[string]interface{} + srv := newOpenRouterServer(t, "/chat/completions", func(t *testing.T, r *http.Request, body map[string]interface{}, w http.ResponseWriter) { + gotPath = r.URL.Path + gotBody = body + w.Header().Set("Content-Type", "text/event-stream") + io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"reasoning\":\"think\"}}]}\n\n") + io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\n") + io.WriteString(w, "data: [DONE]\n\n") + }) + defer srv.Close() + + apiKey := "test-key" + thinking := true + var reasoning strings.Builder + // "qwen_max" would have triggered the removed async-routing branch (empty suffix). + err := newOpenRouterForTest(srv.URL).ChatStreamlyWithSender( + "qwen_max", + []Message{{Role: "user", Content: "hi"}}, + &APIConfig{ApiKey: &apiKey}, + &ChatConfig{Thinking: &thinking}, + func(content, reason *string) error { + if reason != nil { + reasoning.WriteString(*reason) + } + return nil + }, + ) + if err != nil { + t.Fatalf("ChatStreamlyWithSender error: %v", err) + } + if gotPath != "/chat/completions" { + t.Errorf("path=%q, want /chat/completions (qwen must not route to async suffix)", gotPath) + } + if _, ok := gotBody["thinking"]; ok { + t.Errorf("request still sends non-standard `thinking` key: %v", gotBody["thinking"]) + } + reason, ok := gotBody["reasoning"].(map[string]interface{}) + if !ok { + t.Fatalf("request missing standard `reasoning` object, body=%v", gotBody) + } + if reason["enabled"] != true { + t.Errorf("reasoning.enabled=%v, want true", reason["enabled"]) + } + if reasoning.String() != "think" { + t.Errorf("streamed reasoning=%q, want %q", reasoning.String(), "think") + } +}