From 558ea51a0f9fb3808071d6f52aec9dfb9569c94f Mon Sep 17 00:00:00 2001
From: tmimmanuel <14046872+tmimmanuel@users.noreply.github.com>
Date: Mon, 11 May 2026 19:49:35 -1000
Subject: [PATCH] Go: implement provider: StepFun (#14815)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What problem does this PR solve?

Add a Go driver for StepFun (阶跃星辰), one of the unchecked providers on
the umbrella tracking issue #14736.

Until this PR, a tenant who configured `stepfun` as a model provider in
the Go layer fell through to the default branch of
`internal/entity/models/factory.go` and got the dummy driver. Chat, list
models, and check connection all returned `"not implemented"` instead of
reaching the StepFun API.

The Python side has had StepFun registered in `rag/llm/__init__.py` as a
`SupportedLiteLLMProvider` with base URL `https://api.stepfun.com/v1`,
plus `StepFunCV` for vision and `StepFunSeq2txt` for ASR, but no Go
path. StepFun's chat API is OpenAI-compatible, so the implementation
pattern is the same as the merged Moonshot driver (#14433) and OpenAI
driver (#14605).

### What this PR includes

- New file `internal/entity/models/stepfun.go` with a `StepFunModel`
that implements the `ModelDriver` interface.
- `factory.go`: route the `"stepfun"` provider name to
`NewStepFunModel`.
- New `conf/models/stepfun.json` with the public StepFun chat models
(step-2-16k, step-1 family in 8k/32k/128k/256k context lengths,
step-1-flash, and the step-1v / step-1o vision models) and `url_suffix`
entries for `chat` and `models`.

### How the driver works

- StepFun exposes the OpenAI-compatible API at
`https://api.stepfun.com/v1`.
- `ChatWithMessages` and `ChatStreamlyWithSender` post to
`/chat/completions` in the same shape as the merged moonshot,
openrouter, and openai drivers.
- `ListModels` and `CheckConnection` call `/models` to list available
ids and confirm the API key works.
- `Embed` is left as `"not implemented"`. StepFun has not advertised a
public embeddings endpoint in the API reference linked from the umbrella
issue
(`https://platform.stepfun.com/docs/en/api-reference/chat/chat-completion-create`
is the chat endpoint), so any real implementation belongs in a separate
follow-up only after the endpoint is verified.
- `Rerank` and `Balance` return `"no such method"` because StepFun does
not expose either.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

### How was this tested?

- `go build ./internal/entity/models/...` returns exit 0 with no errors
on go 1.25 (the `go.mod` minimum).
- Method set of `StepFunModel` matches the `ModelDriver` interface:
`NewInstance`, `Name`, `ChatWithMessages`, `ChatStreamlyWithSender`,
`Embed`, `Rerank`, `ListModels`, `Balance`, `CheckConnection`.
- Pattern parity with the merged moonshot (#14433), openai (#14605),
openrouter (#14652), and xai (#14550) drivers.

Closes #14814
Tracking: #14736
---
 conf/models/stepfun.json          |  93 ++++++
 internal/entity/models/factory.go |   2 +
 internal/entity/models/stepfun.go | 459 ++++++++++++++++++++++++++++++
 3 files changed, 554 insertions(+)
 create mode 100644 conf/models/stepfun.json
 create mode 100644 internal/entity/models/stepfun.go

diff --git a/conf/models/stepfun.json b/conf/models/stepfun.json
new file mode 100644
index 0000000000..f13b227a49
--- /dev/null
+++ b/conf/models/stepfun.json
@@ -0,0 +1,93 @@
+{
+  "name": "StepFun",
+  "url": {
+    "default": "https://api.stepfun.ai/v1"
+  },
+  "url_suffix": {
+    "chat": "chat/completions",
+    "models": "models"
+  },
+  "class": "step",
+  "models": [
+    {
+      "name": "step-3.5-flash",
+      "max_tokens": 32768,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "step-3.5-flash-paid",
+      "max_tokens": 32768,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "step-2-16k",
+      "max_tokens": 16384,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "step-1-256k",
+      "max_tokens": 262144,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "step-1-128k",
+      "max_tokens": 131072,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "step-1-32k",
+      "max_tokens": 32768,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "step-1-8k",
+      "max_tokens": 8192,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "step-1-flash",
+      "max_tokens": 8192,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "step-1v-32k",
+      "max_tokens": 32768,
+      "model_types": [
+        "chat",
+        "vision"
+      ]
+    },
+    {
+      "name": "step-1v-8k",
+      "max_tokens": 8192,
+      "model_types": [
+        "chat",
+        "vision"
+      ]
+    },
+    {
+      "name": "step-1o-vision-32k",
+      "max_tokens": 32768,
+      "model_types": [
+        "chat",
+        "vision"
+      ]
+    }
+  ]
+}
diff --git a/internal/entity/models/factory.go b/internal/entity/models/factory.go
index d68b7a85f3..f0974635b9 100644
--- a/internal/entity/models/factory.go
+++ b/internal/entity/models/factory.go
@@ -73,6 +73,8 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string
 		return NewCoHereModel(baseURL, urlSuffix), nil
 	case "fishaudio":
 		return NewFishAudioModel(baseURL, urlSuffix), nil
+	case "stepfun":
+		return NewStepFunModel(baseURL, urlSuffix), nil
 	default:
 		return NewDummyModel(baseURL, urlSuffix), nil
 	}
diff --git a/internal/entity/models/stepfun.go b/internal/entity/models/stepfun.go
new file mode 100644
index 0000000000..ddccbabb3d
--- /dev/null
+++ b/internal/entity/models/stepfun.go
@@ -0,0 +1,459 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package models
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+)
+
+// StepFunModel implements ModelDriver for StepFun (阶跃星辰).
+//
+// StepFun exposes an OpenAI-compatible REST API at https://api.stepfun.com/v1
+// (chat completions at /chat/completions, list models at /models). The wire
+// shape matches OpenAI closely enough that the chat path here is a direct
+// port of the OpenAI driver.
+type StepFunModel struct {
+	BaseURL    map[string]string
+	URLSuffix  URLSuffix
+	httpClient *http.Client
+}
+
+// NewStepFunModel creates a new StepFun model instance.
+//
+// We clone http.DefaultTransport so we keep Go's defaults for
+// ProxyFromEnvironment, DialContext (with KeepAlive), HTTP/2,
+// TLSHandshakeTimeout, and ExpectContinueTimeout, and only override
+// the connection-pool fields we care about.
+//
+// The Client itself has no Timeout. http.Client.Timeout would also
+// cap the time spent reading the response body, which would cut off
+// long-lived SSE streams in ChatStreamlyWithSender. Non-streaming
+// callers wrap each request with context.WithTimeout instead.
+func NewStepFunModel(baseURL map[string]string, urlSuffix URLSuffix) *StepFunModel {
+	transport := http.DefaultTransport.(*http.Transport).Clone()
+	transport.MaxIdleConns = 100
+	transport.MaxIdleConnsPerHost = 10
+	transport.IdleConnTimeout = 90 * time.Second
+	transport.DisableCompression = false
+	transport.ResponseHeaderTimeout = 60 * time.Second
+
+	return &StepFunModel{
+		BaseURL:   baseURL,
+		URLSuffix: urlSuffix,
+		httpClient: &http.Client{
+			Transport: transport,
+		},
+	}
+}
+
+func (s *StepFunModel) NewInstance(baseURL map[string]string) ModelDriver {
+	return NewStepFunModel(baseURL, s.URLSuffix)
+}
+
+func (s *StepFunModel) Name() string {
+	return "stepfun"
+}
+
+// baseURLForRegion returns the base URL for the given region, or an
+// error if no entry exists. This makes a misconfigured region fail
+// fast with a clear message, instead of silently producing a relative
+// URL that the HTTP transport then rejects.
+func (s *StepFunModel) baseURLForRegion(region string) (string, error) {
+	base, ok := s.BaseURL[region]
+	if !ok || base == "" {
+		return "", fmt.Errorf("stepfun: no base URL configured for region %q", region)
+	}
+	return base, nil
+}
+
+// ChatWithMessages sends multiple messages with roles and returns the response.
+func (s *StepFunModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
+	if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
+		return nil, fmt.Errorf("api key is required")
+	}
+
+	if len(messages) == 0 {
+		return nil, fmt.Errorf("messages is empty")
+	}
+
+	region := "default"
+	if apiConfig.Region != nil && *apiConfig.Region != "" {
+		region = *apiConfig.Region
+	}
+
+	baseURL, err := s.baseURLForRegion(region)
+	if err != nil {
+		return nil, err
+	}
+	url := fmt.Sprintf("%s/%s", baseURL, s.URLSuffix.Chat)
+
+	apiMessages := make([]map[string]interface{}, len(messages))
+	for i, msg := range messages {
+		apiMessages[i] = map[string]interface{}{
+			"role":    msg.Role,
+			"content": msg.Content,
+		}
+	}
+
+	reqBody := map[string]interface{}{
+		"model":    modelName,
+		"messages": apiMessages,
+		"stream":   false,
+	}
+
+	// Note: do NOT propagate chatModelConfig.Stream into the request body
+	// here. ChatWithMessages parses a single JSON response, so stream must
+	// always be off for this code path.
+	if chatModelConfig != nil {
+		if chatModelConfig.MaxTokens != nil {
+			reqBody["max_tokens"] = *chatModelConfig.MaxTokens
+		}
+		if chatModelConfig.Temperature != nil {
+			reqBody["temperature"] = *chatModelConfig.Temperature
+		}
+		if chatModelConfig.TopP != nil {
+			reqBody["top_p"] = *chatModelConfig.TopP
+		}
+		if chatModelConfig.Stop != nil {
+			reqBody["stop"] = *chatModelConfig.Stop
+		}
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := s.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	var result map[string]interface{}
+	if err = json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+
+	choices, ok := result["choices"].([]interface{})
+	if !ok || len(choices) == 0 {
+		return nil, fmt.Errorf("no choices in response")
+	}
+
+	firstChoice, ok := choices[0].(map[string]interface{})
+	if !ok {
+		return nil, fmt.Errorf("invalid choice format")
+	}
+
+	messageMap, ok := firstChoice["message"].(map[string]interface{})
+	if !ok {
+		return nil, fmt.Errorf("invalid message format")
+	}
+
+	content, ok := messageMap["content"].(string)
+	if !ok {
+		return nil, fmt.Errorf("invalid content format")
+	}
+
+	emptyReason := ""
+	return &ChatResponse{
+		Answer:        &content,
+		ReasonContent: &emptyReason,
+	}, nil
+}
+
+// ChatStreamlyWithSender sends messages and streams the response via the
+// sender function. The StepFun SSE stream uses the same shape as OpenAI:
+// "data:" lines carrying JSON events, with a final "[DONE]" line.
+func (s *StepFunModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
+	if sender == nil {
+		return fmt.Errorf("sender is required")
+	}
+
+	if len(messages) == 0 {
+		return fmt.Errorf("messages is empty")
+	}
+
+	if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
+		return fmt.Errorf("api key is required")
+	}
+
+	var region = "default"
+	if apiConfig.Region != nil && *apiConfig.Region != "" {
+		region = *apiConfig.Region
+	}
+
+	baseURL, err := s.baseURLForRegion(region)
+	if err != nil {
+		return err
+	}
+	url := fmt.Sprintf("%s/%s", baseURL, s.URLSuffix.Chat)
+
+	apiMessages := make([]map[string]interface{}, len(messages))
+	for i, msg := range messages {
+		apiMessages[i] = map[string]interface{}{
+			"role":    msg.Role,
+			"content": msg.Content,
+		}
+	}
+
+	reqBody := map[string]interface{}{
+		"model":    modelName,
+		"messages": apiMessages,
+		"stream":   true,
+	}
+
+	if chatModelConfig != nil {
+		// Refuse to run if the caller explicitly asked for stream=false.
+		// The body of this method only knows how to read SSE, so a
+		// non-SSE JSON response would be parsed as if it were a stream
+		// and produce no chunks. Better to fail clearly.
+		if chatModelConfig.Stream != nil && !*chatModelConfig.Stream {
+			return fmt.Errorf("stream must be true in ChatStreamlyWithSender")
+		}
+
+		if chatModelConfig.MaxTokens != nil {
+			reqBody["max_tokens"] = *chatModelConfig.MaxTokens
+		}
+		if chatModelConfig.Temperature != nil {
+			reqBody["temperature"] = *chatModelConfig.Temperature
+		}
+		if chatModelConfig.TopP != nil {
+			reqBody["top_p"] = *chatModelConfig.TopP
+		}
+		if chatModelConfig.Stop != nil {
+			reqBody["stop"] = *chatModelConfig.Stop
+		}
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	// SSE streams are long-lived. We rely on the transport's
+	// ResponseHeaderTimeout to cap the connection-establishment phase
+	// instead of attaching a hard deadline here.
+	req, err := http.NewRequestWithContext(context.Background(), "POST", url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := s.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// SSE parsing: bump the scanner buffer from the 64KB default to 1MB
+	// so we never silently truncate a long data: line.
+	scanner := bufio.NewScanner(resp.Body)
+	scanner.Buffer(make([]byte, 64*1024), 1024*1024)
+	sawTerminal := false
+	for scanner.Scan() {
+		line := scanner.Text()
+
+		if !strings.HasPrefix(line, "data:") {
+			continue
+		}
+
+		data := strings.TrimSpace(line[5:])
+
+		if data == "[DONE]" {
+			sawTerminal = true
+			break
+		}
+
+		var event map[string]interface{}
+		if err = json.Unmarshal([]byte(data), &event); err != nil {
+			continue
+		}
+
+		choices, ok := event["choices"].([]interface{})
+		if !ok || len(choices) == 0 {
+			continue
+		}
+
+		firstChoice, ok := choices[0].(map[string]interface{})
+		if !ok {
+			continue
+		}
+
+		delta, ok := firstChoice["delta"].(map[string]interface{})
+		if !ok {
+			continue
+		}
+
+		content, ok := delta["content"].(string)
+		if ok && content != "" {
+			if err := sender(&content, nil); err != nil {
+				return err
+			}
+		}
+
+		finishReason, ok := firstChoice["finish_reason"].(string)
+		if ok && finishReason != "" {
+			sawTerminal = true
+			break
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return fmt.Errorf("failed to scan response body: %w", err)
+	}
+	if !sawTerminal {
+		return fmt.Errorf("stepfun: stream ended before [DONE] or finish_reason")
+	}
+
+	endOfStream := "[DONE]"
+	if err := sender(&endOfStream, nil); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// Embed is left as a stub. StepFun has not advertised a public embeddings
+// endpoint in the API reference linked from the umbrella issue, so any real
+// implementation belongs in a follow-up only after the endpoint is verified.
+func (s *StepFunModel) Embed(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([]EmbeddingData, error) {
+	return nil, fmt.Errorf("not implemented")
+}
+
+// ListModels returns the list of model ids visible to the API key.
+func (s *StepFunModel) ListModels(apiConfig *APIConfig) ([]string, error) {
+	if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
+		return nil, fmt.Errorf("api key is required")
+	}
+
+	region := "default"
+	if apiConfig.Region != nil && *apiConfig.Region != "" {
+		region = *apiConfig.Region
+	}
+
+	baseURL, err := s.baseURLForRegion(region)
+	if err != nil {
+		return nil, err
+	}
+	url := fmt.Sprintf("%s/%s", baseURL, s.URLSuffix.Models)
+
+	ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := s.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	var result map[string]interface{}
+	if err = json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+
+	data, ok := result["data"].([]interface{})
+	if !ok {
+		return nil, fmt.Errorf("invalid models list format")
+	}
+
+	models := make([]string, 0)
+	for _, model := range data {
+		modelMap, ok := model.(map[string]interface{})
+		if !ok {
+			continue
+		}
+		modelName, ok := modelMap["id"].(string)
+		if !ok {
+			continue
+		}
+		models = append(models, modelName)
+	}
+
+	return models, nil
+}
+
+// Balance is not exposed by the StepFun API, so this returns "no such method".
+func (s *StepFunModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) {
+	return nil, fmt.Errorf("no such method")
+}
+
+// CheckConnection runs a lightweight ListModels call to verify the API key.
+func (s *StepFunModel) CheckConnection(apiConfig *APIConfig) error {
+	_, err := s.ListModels(apiConfig)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// Rerank calculates similarity scores between query and documents. StepFun
+// does not expose a public rerank API, so this returns "no such method".
+func (s *StepFunModel) Rerank(modelName *string, query string, documents []string, apiConfig *APIConfig, rerankConfig *RerankConfig) (*RerankResponse, error) {
+	return nil, fmt.Errorf("no such method")
+}