mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
fix(go): correct OpenRouter streaming URL routing and reasoning parameter (#16111)
### What problem does this PR solve?
Fixes two bugs in the OpenRouter streaming chat request builder
(`internal/entity/models/openrouter.go`, `ChatStreamlyWithSender`):
1. **qwen/glm models streamed to a broken URL.** The code routed any
`qwen`/`glm` model to
`URLSuffix.AsyncChat`, but `conf/models/openrouter.json` defines no
`async_chat` suffix
(empty), so the request was POSTed to `<base>/` instead of
`<base>/chat/completions` —
breaking streaming for every qwen/glm model. The non-stream path has no
such branch.
Fix: all models use the standard `Chat` suffix, consistent with the
non-stream path.
2. **Streaming reasoning was never enabled.** The request set reasoning
via a non-standard
`thinking` key, which OpenRouter ignores. OpenRouter's API — and this
provider's own
non-stream request (line ~110) and its streamed `delta.reasoning` parser
(line ~311) —
use the `reasoning` object. Fix: send `reasoning: {"enabled":
<thinking>}` (and
`{"effort": ...}` when set, taking precedence as in the non-stream
path).
Closes #16110
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -202,13 +202,12 @@ func (o *OpenRouterModel) ChatStreamlyWithSender(modelName string, messages []Me
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// All OpenRouter models use the standard chat-completions endpoint, same as
|
||||
// the non-stream path. The previous qwen/glm branch routed to URLSuffix.AsyncChat,
|
||||
// which OpenRouter does not configure (empty suffix) — producing a broken URL and
|
||||
// breaking streaming for every qwen/glm model.
|
||||
url := fmt.Sprintf("%s/%s", resolvedBaseURL, o.baseModel.URLSuffix.Chat)
|
||||
|
||||
modelType := strings.Split(modelName, "_")[0]
|
||||
if modelType == "qwen" || modelType == "glm" {
|
||||
url = fmt.Sprintf("%s/%s", resolvedBaseURL, o.baseModel.URLSuffix.AsyncChat)
|
||||
}
|
||||
|
||||
// Convert messages to API format
|
||||
apiMessages := make([]map[string]interface{}, len(messages))
|
||||
for i, msg := range messages {
|
||||
@@ -250,16 +249,16 @@ func (o *OpenRouterModel) ChatStreamlyWithSender(modelName string, messages []Me
|
||||
reqBody["stop"] = *modelConfig.Stop
|
||||
}
|
||||
|
||||
// OpenRouter controls reasoning via the standard `reasoning` request object
|
||||
// (the non-stream path and the streamed `delta.reasoning` response use it too).
|
||||
// The previous `thinking` key is non-standard and silently ignored by the API,
|
||||
// so streaming reasoning was never actually enabled. `effort` takes precedence,
|
||||
// matching the non-stream path.
|
||||
if modelConfig.Thinking != nil {
|
||||
if *modelConfig.Thinking {
|
||||
reqBody["thinking"] = map[string]interface{}{
|
||||
"type": "enabled",
|
||||
}
|
||||
} else {
|
||||
reqBody["thinking"] = map[string]interface{}{
|
||||
"type": "disabled",
|
||||
}
|
||||
}
|
||||
reqBody["reasoning"] = map[string]interface{}{"enabled": *modelConfig.Thinking}
|
||||
}
|
||||
if modelConfig.Effort != nil {
|
||||
reqBody["reasoning"] = map[string]interface{}{"effort": *modelConfig.Effort}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -218,3 +218,57 @@ func TestOpenRouterTranscribeAudioHTTPError(t *testing.T) {
|
||||
t.Fatalf("err=%v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestOpenRouterChatStreamlyRequest verifies the streaming chat request is built
|
||||
// correctly: every model (including qwen/glm) targets the standard chat endpoint,
|
||||
// and reasoning is requested via OpenRouter's standard `reasoning` object rather
|
||||
// than the non-standard `thinking` key that the API silently ignores.
|
||||
func TestOpenRouterChatStreamlyRequest(t *testing.T) {
|
||||
var gotPath string
|
||||
var gotBody map[string]interface{}
|
||||
srv := newOpenRouterServer(t, "/chat/completions", func(t *testing.T, r *http.Request, body map[string]interface{}, w http.ResponseWriter) {
|
||||
gotPath = r.URL.Path
|
||||
gotBody = body
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"reasoning\":\"think\"}}]}\n\n")
|
||||
io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\n")
|
||||
io.WriteString(w, "data: [DONE]\n\n")
|
||||
})
|
||||
defer srv.Close()
|
||||
|
||||
apiKey := "test-key"
|
||||
thinking := true
|
||||
var reasoning strings.Builder
|
||||
// "qwen_max" would have triggered the removed async-routing branch (empty suffix).
|
||||
err := newOpenRouterForTest(srv.URL).ChatStreamlyWithSender(
|
||||
"qwen_max",
|
||||
[]Message{{Role: "user", Content: "hi"}},
|
||||
&APIConfig{ApiKey: &apiKey},
|
||||
&ChatConfig{Thinking: &thinking},
|
||||
func(content, reason *string) error {
|
||||
if reason != nil {
|
||||
reasoning.WriteString(*reason)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("ChatStreamlyWithSender error: %v", err)
|
||||
}
|
||||
if gotPath != "/chat/completions" {
|
||||
t.Errorf("path=%q, want /chat/completions (qwen must not route to async suffix)", gotPath)
|
||||
}
|
||||
if _, ok := gotBody["thinking"]; ok {
|
||||
t.Errorf("request still sends non-standard `thinking` key: %v", gotBody["thinking"])
|
||||
}
|
||||
reason, ok := gotBody["reasoning"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("request missing standard `reasoning` object, body=%v", gotBody)
|
||||
}
|
||||
if reason["enabled"] != true {
|
||||
t.Errorf("reasoning.enabled=%v, want true", reason["enabled"])
|
||||
}
|
||||
if reasoning.String() != "think" {
|
||||
t.Errorf("streamed reasoning=%q, want %q", reasoning.String(), "think")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user