mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Go: implement provider: xAI (#14550)
Closes #14552 ### What problem does this PR solve? Add a Go driver for xAI (Grok models). The config file conf/models/xai.json has been in the repo since the early Go provider work, but internal/entity/models/factory.go had no case for "xai". So any xAI request fell through to the dummy driver and never reached the API. This PR adds the missing driver and wires it up. ### What this PR includes - New file internal/entity/models/xai.go with an XAIModel that implements the ModelDriver interface. - factory.go: route the "xai" provider name to NewXAIModel. ### How the driver works - xAI exposes an OpenAI-compatible API at https://api.x.ai/v1. - ChatWithMessages and ChatStreamlyWithSender post to /chat/completions in the same shape the moonshot and deepseek drivers use. - ListModels and CheckConnection call /models to confirm the API key works and to list available model ids. - reasoning_content is passed through for grok-3-mini and other xAI reasoning models, both in the non-stream and stream paths. - Encode, Rerank, and Balance are not part of the public xAI API at the moment, so they return a clear "not implemented" or "no such method" error. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? - go build ./internal/entity/models/... in a clean go 1.25 image (the go.mod minimum) returns exit 0 with no errors. - Method set of XAIModel matches the ModelDriver interface: NewInstance, Name, ChatWithMessages, ChatStreamlyWithSender, Encode, Rerank, ListModels, Balance, CheckConnection. - Pattern parity with the merged moonshot (#14433), volcengine (#14460), minimax (#14478), and vllm (#14532) PRs. --------- Co-authored-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@@ -53,6 +53,8 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string
|
||||
return NewVolcEngine(baseURL, urlSuffix), nil
|
||||
case "vllm":
|
||||
return NewVllmModel(baseURL, urlSuffix), nil
|
||||
case "xai":
|
||||
return NewXAIModel(baseURL, urlSuffix), nil
|
||||
default:
|
||||
return NewDummyModel(baseURL, urlSuffix), nil
|
||||
}
|
||||
|
||||
494
internal/entity/models/xai.go
Normal file
494
internal/entity/models/xai.go
Normal file
@@ -0,0 +1,494 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package models
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// nonStreamCallTimeout caps the time spent on a single non-streaming
|
||||
// request (ChatWithMessages, ListModels). The shared httpClient itself
|
||||
// has no client-wide timeout, so streaming requests can run as long as
|
||||
// the API keeps the SSE connection open.
|
||||
const nonStreamCallTimeout = 120 * time.Second
|
||||
|
||||
// XAIModel implements ModelDriver for xAI (Grok models)
|
||||
type XAIModel struct {
|
||||
BaseURL map[string]string
|
||||
URLSuffix URLSuffix
|
||||
httpClient *http.Client // Reusable HTTP client with connection pool
|
||||
}
|
||||
|
||||
// NewXAIModel creates a new xAI model instance.
|
||||
//
|
||||
// We clone http.DefaultTransport so we keep Go's defaults for
|
||||
// ProxyFromEnvironment, DialContext (with KeepAlive), HTTP/2,
|
||||
// TLSHandshakeTimeout, and ExpectContinueTimeout, and only override
|
||||
// the few connection-pool fields we care about.
|
||||
//
|
||||
// The Client itself has no Timeout. http.Client.Timeout would also
|
||||
// cap the time spent reading the response body, which would cut off
|
||||
// long-lived SSE streams in ChatStreamlyWithSender. Non-streaming
|
||||
// callers wrap each request with context.WithTimeout instead.
|
||||
func NewXAIModel(baseURL map[string]string, urlSuffix URLSuffix) *XAIModel {
|
||||
transport := http.DefaultTransport.(*http.Transport).Clone()
|
||||
transport.MaxIdleConns = 100
|
||||
transport.MaxIdleConnsPerHost = 10
|
||||
transport.IdleConnTimeout = 90 * time.Second
|
||||
transport.DisableCompression = false
|
||||
|
||||
return &XAIModel{
|
||||
BaseURL: baseURL,
|
||||
URLSuffix: urlSuffix,
|
||||
httpClient: &http.Client{
|
||||
Transport: transport,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (z *XAIModel) NewInstance(baseURL map[string]string) ModelDriver {
|
||||
return NewXAIModel(baseURL, z.URLSuffix)
|
||||
}
|
||||
|
||||
func (z *XAIModel) Name() string {
|
||||
return "xai"
|
||||
}
|
||||
|
||||
// baseURLForRegion returns the base URL for the given region, or an
|
||||
// error if no entry exists. This makes a misconfigured region fail
|
||||
// fast with a clear message, instead of silently producing a relative
|
||||
// URL that the HTTP transport then rejects.
|
||||
func (z *XAIModel) baseURLForRegion(region string) (string, error) {
|
||||
base, ok := z.BaseURL[region]
|
||||
if !ok || base == "" {
|
||||
return "", fmt.Errorf("xai: no base URL configured for region %q", region)
|
||||
}
|
||||
return base, nil
|
||||
}
|
||||
|
||||
// ChatWithMessages sends multiple messages with roles and returns the response
|
||||
func (z *XAIModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
|
||||
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
|
||||
return nil, fmt.Errorf("api key is required")
|
||||
}
|
||||
|
||||
if len(messages) == 0 {
|
||||
return nil, fmt.Errorf("messages is empty")
|
||||
}
|
||||
|
||||
var region = "default"
|
||||
if apiConfig.Region != nil {
|
||||
region = *apiConfig.Region
|
||||
}
|
||||
|
||||
baseURL, err := z.baseURLForRegion(region)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s", baseURL, z.URLSuffix.Chat)
|
||||
|
||||
// Convert messages to the format expected by the API
|
||||
apiMessages := make([]map[string]interface{}, len(messages))
|
||||
for i, msg := range messages {
|
||||
apiMessages[i] = map[string]interface{}{
|
||||
"role": msg.Role,
|
||||
"content": msg.Content,
|
||||
}
|
||||
}
|
||||
|
||||
// Build request body
|
||||
reqBody := map[string]interface{}{
|
||||
"model": modelName,
|
||||
"messages": apiMessages,
|
||||
"stream": false,
|
||||
"temperature": 1,
|
||||
}
|
||||
|
||||
// Note: do NOT propagate chatModelConfig.Stream into the request body
|
||||
// here. ChatWithMessages parses a single JSON response, so SSE/stream
|
||||
// must always be off for this code path.
|
||||
if chatModelConfig != nil {
|
||||
if chatModelConfig.MaxTokens != nil {
|
||||
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
|
||||
}
|
||||
|
||||
if chatModelConfig.Temperature != nil {
|
||||
reqBody["temperature"] = *chatModelConfig.Temperature
|
||||
}
|
||||
|
||||
if chatModelConfig.TopP != nil {
|
||||
reqBody["top_p"] = *chatModelConfig.TopP
|
||||
}
|
||||
|
||||
if chatModelConfig.Stop != nil {
|
||||
reqBody["stop"] = *chatModelConfig.Stop
|
||||
}
|
||||
}
|
||||
|
||||
jsonData, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
||||
|
||||
resp, err := z.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
// Parse response
|
||||
var result map[string]interface{}
|
||||
if err = json.Unmarshal(body, &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
choices, ok := result["choices"].([]interface{})
|
||||
if !ok || len(choices) == 0 {
|
||||
return nil, fmt.Errorf("no choices in response")
|
||||
}
|
||||
|
||||
firstChoice, ok := choices[0].(map[string]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid choice format")
|
||||
}
|
||||
|
||||
messageMap, ok := firstChoice["message"].(map[string]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid message format")
|
||||
}
|
||||
|
||||
content, ok := messageMap["content"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid content format")
|
||||
}
|
||||
|
||||
// xAI reasoning models (grok-3-mini and similar) return reasoning text in
|
||||
// the reasoning_content field. Pass it through when present.
|
||||
var reasonContent string
|
||||
if rc, ok := messageMap["reasoning_content"].(string); ok {
|
||||
reasonContent = rc
|
||||
if reasonContent != "" && reasonContent[0] == '\n' {
|
||||
reasonContent = reasonContent[1:]
|
||||
}
|
||||
}
|
||||
|
||||
chatResponse := &ChatResponse{
|
||||
Answer: &content,
|
||||
ReasonContent: &reasonContent,
|
||||
}
|
||||
|
||||
return chatResponse, nil
|
||||
}
|
||||
|
||||
// ChatStreamlyWithSender sends messages and streams the response via the
|
||||
// sender function. Used for streaming chat responses with no extra channel.
|
||||
func (z *XAIModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
|
||||
if len(messages) == 0 {
|
||||
return fmt.Errorf("messages is empty")
|
||||
}
|
||||
|
||||
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
|
||||
return fmt.Errorf("api key is required")
|
||||
}
|
||||
|
||||
var region = "default"
|
||||
if apiConfig.Region != nil {
|
||||
region = *apiConfig.Region
|
||||
}
|
||||
|
||||
baseURL, err := z.baseURLForRegion(region)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s", baseURL, z.URLSuffix.Chat)
|
||||
|
||||
// Convert messages to API format (supports multimodal content)
|
||||
apiMessages := make([]map[string]interface{}, len(messages))
|
||||
for i, msg := range messages {
|
||||
apiMessages[i] = map[string]interface{}{
|
||||
"role": msg.Role,
|
||||
"content": msg.Content,
|
||||
}
|
||||
}
|
||||
|
||||
// Build request body with streaming on by default
|
||||
reqBody := map[string]interface{}{
|
||||
"model": modelName,
|
||||
"messages": apiMessages,
|
||||
"stream": true,
|
||||
}
|
||||
|
||||
if chatModelConfig != nil {
|
||||
// Refuse to run if the caller explicitly asked for stream=false.
|
||||
// The body of this method only knows how to read SSE, so a non-SSE
|
||||
// JSON response would be parsed as if it were a stream and produce
|
||||
// no chunks. Better to fail clearly. Leave reqBody["stream"] as
|
||||
// the default (true) when Stream is nil or true.
|
||||
if chatModelConfig.Stream != nil && !*chatModelConfig.Stream {
|
||||
return fmt.Errorf("stream must be true in ChatStreamlyWithSender")
|
||||
}
|
||||
|
||||
if chatModelConfig.MaxTokens != nil {
|
||||
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
|
||||
}
|
||||
|
||||
if chatModelConfig.Temperature != nil {
|
||||
reqBody["temperature"] = *chatModelConfig.Temperature
|
||||
}
|
||||
|
||||
if chatModelConfig.TopP != nil {
|
||||
reqBody["top_p"] = *chatModelConfig.TopP
|
||||
}
|
||||
|
||||
if chatModelConfig.Stop != nil {
|
||||
reqBody["stop"] = *chatModelConfig.Stop
|
||||
}
|
||||
}
|
||||
|
||||
jsonData, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
||||
|
||||
resp, err := z.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
// SSE parsing: read line by line. The default bufio.Scanner buffer
|
||||
// is 64KB, which can be too small for long SSE chunks. Bump it to
|
||||
// 1MB so we never silently truncate a long data: line.
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
scanner.Buffer(make([]byte, 64*1024), 1024*1024)
|
||||
// sawTerminal flips to true when the upstream actually told us the
|
||||
// stream is over (either a "[DONE]" marker or a non-empty
|
||||
// finish_reason). If the body closes before either of those, we
|
||||
// must not emit a synthetic "[DONE]" because that would hide a
|
||||
// truncated response from the caller.
|
||||
sawTerminal := false
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
// SSE data line starts with "data:"
|
||||
if !strings.HasPrefix(line, "data:") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract JSON after "data:"
|
||||
data := strings.TrimSpace(line[5:])
|
||||
|
||||
// [DONE] marks the end of the stream
|
||||
if data == "[DONE]" {
|
||||
sawTerminal = true
|
||||
break
|
||||
}
|
||||
|
||||
// Parse the JSON event
|
||||
var event map[string]interface{}
|
||||
if err = json.Unmarshal([]byte(data), &event); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
choices, ok := event["choices"].([]interface{})
|
||||
if !ok || len(choices) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
firstChoice, ok := choices[0].(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
delta, ok := firstChoice["delta"].(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
reasoningContent, ok := delta["reasoning_content"].(string)
|
||||
if ok && reasoningContent != "" {
|
||||
if err := sender(nil, &reasoningContent); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
content, ok := delta["content"].(string)
|
||||
if ok && content != "" {
|
||||
if err := sender(&content, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
finishReason, ok := firstChoice["finish_reason"].(string)
|
||||
if ok && finishReason != "" {
|
||||
sawTerminal = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return fmt.Errorf("failed to scan response body: %w", err)
|
||||
}
|
||||
if !sawTerminal {
|
||||
return fmt.Errorf("xai: stream ended before [DONE] or finish_reason")
|
||||
}
|
||||
|
||||
// Send the [DONE] marker for OpenAI compatibility
|
||||
endOfStream := "[DONE]"
|
||||
if err := sender(&endOfStream, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Encode encodes a list of texts into embeddings. xAI does not expose a
|
||||
// public embedding API yet, so this is left unimplemented.
|
||||
func (z *XAIModel) Encode(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([][]float64, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
// ListModels returns the list of model ids visible to the API key.
|
||||
func (z *XAIModel) ListModels(apiConfig *APIConfig) ([]string, error) {
|
||||
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
|
||||
return nil, fmt.Errorf("api key is required")
|
||||
}
|
||||
|
||||
var region = "default"
|
||||
if apiConfig.Region != nil {
|
||||
region = *apiConfig.Region
|
||||
}
|
||||
|
||||
baseURL, err := z.baseURLForRegion(region)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
url := fmt.Sprintf("%s/%s", baseURL, z.URLSuffix.Models)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
||||
|
||||
resp, err := z.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
// Parse response
|
||||
var result map[string]interface{}
|
||||
if err = json.Unmarshal(body, &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
data, ok := result["data"].([]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid models list format")
|
||||
}
|
||||
|
||||
models := make([]string, 0)
|
||||
for _, model := range data {
|
||||
modelMap, ok := model.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
modelName, ok := modelMap["id"].(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
models = append(models, modelName)
|
||||
}
|
||||
|
||||
return models, nil
|
||||
}
|
||||
|
||||
// Balance is not exposed by the xAI API, so this returns "no such method".
|
||||
func (z *XAIModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) {
|
||||
return nil, fmt.Errorf("no such method")
|
||||
}
|
||||
|
||||
// CheckConnection runs a lightweight ListModels call to verify the API key.
|
||||
func (z *XAIModel) CheckConnection(apiConfig *APIConfig) error {
|
||||
_, err := z.ListModels(apiConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Rerank calculates similarity scores between query and texts. xAI does not
|
||||
// expose a rerank API, so this is left unimplemented.
|
||||
func (z *XAIModel) Rerank(modelName *string, query string, texts []string, apiConfig *APIConfig) ([]float64, error) {
|
||||
return nil, fmt.Errorf("%s, Rerank not implemented", z.Name())
|
||||
}
|
||||
Reference in New Issue
Block a user