diff --git a/internal/entity/models/openrouter.go b/internal/entity/models/openrouter.go index adbf0a4a4f..8c7054f104 100644 --- a/internal/entity/models/openrouter.go +++ b/internal/entity/models/openrouter.go @@ -202,13 +202,12 @@ func (o *OpenRouterModel) ChatStreamlyWithSender(modelName string, messages []Me if err != nil { return err } + // All OpenRouter models use the standard chat-completions endpoint, same as + // the non-stream path. The previous qwen/glm branch routed to URLSuffix.AsyncChat, + // which OpenRouter does not configure (empty suffix) — producing a broken URL and + // breaking streaming for every qwen/glm model. url := fmt.Sprintf("%s/%s", resolvedBaseURL, o.baseModel.URLSuffix.Chat) - modelType := strings.Split(modelName, "_")[0] - if modelType == "qwen" || modelType == "glm" { - url = fmt.Sprintf("%s/%s", resolvedBaseURL, o.baseModel.URLSuffix.AsyncChat) - } - // Convert messages to API format apiMessages := make([]map[string]interface{}, len(messages)) for i, msg := range messages { @@ -250,16 +249,16 @@ func (o *OpenRouterModel) ChatStreamlyWithSender(modelName string, messages []Me reqBody["stop"] = *modelConfig.Stop } + // OpenRouter controls reasoning via the standard `reasoning` request object + // (the non-stream path and the streamed `delta.reasoning` response use it too). + // The previous `thinking` key is non-standard and silently ignored by the API, + // so streaming reasoning was never actually enabled. `effort` takes precedence, + // matching the non-stream path. if modelConfig.Thinking != nil { - if *modelConfig.Thinking { - reqBody["thinking"] = map[string]interface{}{ - "type": "enabled", - } - } else { - reqBody["thinking"] = map[string]interface{}{ - "type": "disabled", - } - } + reqBody["reasoning"] = map[string]interface{}{"enabled": *modelConfig.Thinking} + } + if modelConfig.Effort != nil { + reqBody["reasoning"] = map[string]interface{}{"effort": *modelConfig.Effort} } } diff --git a/internal/entity/models/openrouter_test.go b/internal/entity/models/openrouter_test.go index 8276a28f2d..598195a1d4 100644 --- a/internal/entity/models/openrouter_test.go +++ b/internal/entity/models/openrouter_test.go @@ -218,3 +218,57 @@ func TestOpenRouterTranscribeAudioHTTPError(t *testing.T) { t.Fatalf("err=%v", err) } } + +// TestOpenRouterChatStreamlyRequest verifies the streaming chat request is built +// correctly: every model (including qwen/glm) targets the standard chat endpoint, +// and reasoning is requested via OpenRouter's standard `reasoning` object rather +// than the non-standard `thinking` key that the API silently ignores. +func TestOpenRouterChatStreamlyRequest(t *testing.T) { + var gotPath string + var gotBody map[string]interface{} + srv := newOpenRouterServer(t, "/chat/completions", func(t *testing.T, r *http.Request, body map[string]interface{}, w http.ResponseWriter) { + gotPath = r.URL.Path + gotBody = body + w.Header().Set("Content-Type", "text/event-stream") + io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"reasoning\":\"think\"}}]}\n\n") + io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\n") + io.WriteString(w, "data: [DONE]\n\n") + }) + defer srv.Close() + + apiKey := "test-key" + thinking := true + var reasoning strings.Builder + // "qwen_max" would have triggered the removed async-routing branch (empty suffix). + err := newOpenRouterForTest(srv.URL).ChatStreamlyWithSender( + "qwen_max", + []Message{{Role: "user", Content: "hi"}}, + &APIConfig{ApiKey: &apiKey}, + &ChatConfig{Thinking: &thinking}, + func(content, reason *string) error { + if reason != nil { + reasoning.WriteString(*reason) + } + return nil + }, + ) + if err != nil { + t.Fatalf("ChatStreamlyWithSender error: %v", err) + } + if gotPath != "/chat/completions" { + t.Errorf("path=%q, want /chat/completions (qwen must not route to async suffix)", gotPath) + } + if _, ok := gotBody["thinking"]; ok { + t.Errorf("request still sends non-standard `thinking` key: %v", gotBody["thinking"]) + } + reason, ok := gotBody["reasoning"].(map[string]interface{}) + if !ok { + t.Fatalf("request missing standard `reasoning` object, body=%v", gotBody) + } + if reason["enabled"] != true { + t.Errorf("reasoning.enabled=%v, want true", reason["enabled"]) + } + if reasoning.String() != "think" { + t.Errorf("streamed reasoning=%q, want %q", reasoning.String(), "think") + } +}