mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-03 01:01:56 +08:00
### Summary Closes #15381 Every provider in `internal/entity/models/` reads its streaming response with `bufio.NewScanner(resp.Body)` and iterates over `scanner.Scan()`. The default `bufio.Scanner` maximum token size is 64KB, so when an upstream sends a single SSE `data:` line larger than 64KB (long content deltas, large tool or function call argument blobs, bundled `reasoning_content`, or providers that emit a whole message in one event) `scanner.Scan()` returns `false` and `scanner.Err()` returns `bufio.ErrTooLong`. Streaming chat then ends with an error partway through the response. This change adds `scanner.Buffer(make([]byte, 64*1024), 1024*1024)` immediately after every SSE scanner that was still bare, raising the cap to 1MB. 1MB is the value already used for streaming chat in `openai.go`, `modelscope.go`, `groq.go`, `mistral.go`, `xai.go` and the other already patched providers (the 8MB cap in the repo is reserved for TTS and embedding paths), so this simply converges the remaining providers onto the established pattern. Nothing else changes: line parsing, `data:` prefix handling, `[DONE]` detection, JSON unmarshalling, error handling, and the existing `scanner.Err()` checks all stay the same. Providers covered (23 scanners across 22 files): 302ai, aliyun, baichuan, baidu, cohere, deepinfra, deepseek, gitee, huggingface, lmstudio, minimax (the chat scanner, whose TTS scanner was already bumped), moonshot, nvidia, ollama, openrouter, orcarouter, paddleocr, siliconflow, tokenhub, vllm, volcengine, xunfei, zhipu-ai. `jiekouai.go` is excluded because it is covered by the in flight #15337. A table driven regression test (`sse_scanner_buffer_test.go`) streams a single 128KB `data:` content delta followed by `data: [DONE]` through an `httptest` server and asserts that `ChatStreamlyWithSender` delivers the full content with no error across a representative subset of providers. Without the buffer fix the test fails with `bufio.Scanner: token too long`. This PR also removes three duplicate declarations of the package level `roundTripperFunc` test helper that several recently merged provider PRs each added independently, which had left the `internal/entity/models` test package unable to compile. The helper now lives in a single place and is shared. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
100 lines
4.0 KiB
Go
100 lines
4.0 KiB
Go
package models
|
|
|
|
import (
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
// chatStreamer is the streaming entrypoint shared by every OpenAI-compatible
|
|
// provider. The buffer regression below exercises it through a table so a new
|
|
// provider only needs one row.
|
|
type chatStreamer interface {
|
|
ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error
|
|
}
|
|
|
|
// largeSSEStreamServer streams a single SSE "data:" line whose content delta is
|
|
// larger than the default 64KB bufio.Scanner token size, followed by a
|
|
// finish_reason chunk and the [DONE] sentinel. Without scanner.Buffer(...) the
|
|
// oversized line makes scanner.Scan() return false with bufio.ErrTooLong and the
|
|
// stream is truncated; with the raised buffer the full content is delivered.
|
|
func largeSSEStreamServer(t *testing.T, content string) *httptest.Server {
|
|
t.Helper()
|
|
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/event-stream")
|
|
_, _ = io.WriteString(w,
|
|
`data: {"choices":[{"delta":{"content":"`+content+`"}}]}`+"\n"+
|
|
`data: {"choices":[{"delta":{},"finish_reason":"stop"}]}`+"\n"+
|
|
`data: [DONE]`+"\n",
|
|
)
|
|
}))
|
|
}
|
|
|
|
func TestChatStreamLargeChunkNotTruncated(t *testing.T) {
|
|
// 128KB content delta: comfortably past the 64KB default so the bare
|
|
// scanner would fail, well under the 1MB raised cap so the fix succeeds.
|
|
const big = 128 * 1024
|
|
content := strings.Repeat("a", big)
|
|
|
|
suffix := URLSuffix{Chat: "chat/completions", Models: "models"}
|
|
build := func(c func(map[string]string, URLSuffix) chatStreamer) func(string) chatStreamer {
|
|
return func(baseURL string) chatStreamer {
|
|
return c(map[string]string{"default": baseURL}, suffix)
|
|
}
|
|
}
|
|
|
|
cases := []struct {
|
|
name string
|
|
build func(string) chatStreamer
|
|
}{
|
|
{"deepinfra", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewDeepInfraModel(b, s) })},
|
|
{"vllm", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewVllmModel(b, s) })},
|
|
{"openrouter", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewOpenRouterModel(b, s) })},
|
|
{"siliconflow", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewSiliconflowModel(b, s) })},
|
|
{"moonshot", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewMoonshotModel(b, s) })},
|
|
{"deepseek", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewDeepSeekModel(b, s) })},
|
|
{"nvidia", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewNvidiaModel(b, s) })},
|
|
{"lmstudio", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewLmStudioModel(b, s) })},
|
|
{"gitee", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewGiteeModel(b, s) })},
|
|
{"tokenhub", build(func(b map[string]string, s URLSuffix) chatStreamer { return NewTokenHubModel(b, s) })},
|
|
}
|
|
|
|
for _, tc := range cases {
|
|
tc := tc
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
srv := largeSSEStreamServer(t, content)
|
|
defer srv.Close()
|
|
|
|
apiKey := "test-key"
|
|
var got strings.Builder
|
|
err := tc.build(srv.URL).ChatStreamlyWithSender(
|
|
"test-model",
|
|
[]Message{{Role: "user", Content: "hi"}},
|
|
&APIConfig{ApiKey: &apiKey},
|
|
// Empty (non-nil) config: providers default stream=true and
|
|
// only override when Stream != nil, so this streams for all of
|
|
// them while avoiding a nil-config deref in providers that read
|
|
// modelConfig unconditionally.
|
|
&ChatConfig{},
|
|
func(c *string, _ *string) error {
|
|
if c != nil && *c != "[DONE]" {
|
|
got.WriteString(*c)
|
|
}
|
|
return nil
|
|
},
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("ChatStreamlyWithSender returned error (large chunk truncated?): %v", err)
|
|
}
|
|
if got.Len() != big {
|
|
t.Fatalf("delivered %d bytes, want %d (content was truncated)", got.Len(), big)
|
|
}
|
|
if got.String() != content {
|
|
t.Fatalf("delivered content does not match streamed content")
|
|
}
|
|
})
|
|
}
|
|
}
|