Go: implement provider: xAI (#14550)

Closes #14552 

### What problem does this PR solve?

Add a Go driver for xAI (Grok models).

The config file conf/models/xai.json has been in the repo since the
early Go provider work, but internal/entity/models/factory.go had no
case for "xai". So any xAI request fell through to the dummy driver
and never reached the API. This PR adds the missing driver and wires it
up.

### What this PR includes

- New file internal/entity/models/xai.go with an XAIModel that
implements the ModelDriver interface.
- factory.go: route the "xai" provider name to NewXAIModel.

### How the driver works

- xAI exposes an OpenAI-compatible API at https://api.x.ai/v1.
- ChatWithMessages and ChatStreamlyWithSender post to /chat/completions
in the same shape the moonshot and deepseek drivers use.
- ListModels and CheckConnection call /models to confirm the API key
works and to list available model ids.
- reasoning_content is passed through for grok-3-mini and other xAI
reasoning models, both in the non-stream and stream paths.
- Encode, Rerank, and Balance are not part of the public xAI API at the
moment, so they return a clear "not implemented" or "no such method"
error.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

### How was this tested?

- go build ./internal/entity/models/... in a clean go 1.25 image (the
  go.mod minimum) returns exit 0 with no errors.
- Method set of XAIModel matches the ModelDriver interface: NewInstance,
  Name, ChatWithMessages, ChatStreamlyWithSender, Encode, Rerank,
  ListModels, Balance, CheckConnection.
- Pattern parity with the merged moonshot (#14433), volcengine (#14460),
  minimax (#14478), and vllm (#14532) PRs.

---------

Co-authored-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jack Storment
2026-05-06 06:16:37 +02:00
committed by GitHub
parent cd54c08e84
commit c2ad672c09
2 changed files with 496 additions and 0 deletions

View File

@@ -53,6 +53,8 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string
return NewVolcEngine(baseURL, urlSuffix), nil
case "vllm":
return NewVllmModel(baseURL, urlSuffix), nil
case "xai":
return NewXAIModel(baseURL, urlSuffix), nil
default:
return NewDummyModel(baseURL, urlSuffix), nil
}

View File

@@ -0,0 +1,494 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package models
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// nonStreamCallTimeout caps the time spent on a single non-streaming
// request (ChatWithMessages, ListModels). The shared httpClient itself
// has no client-wide timeout, so streaming requests can run as long as
// the API keeps the SSE connection open.
const nonStreamCallTimeout = 120 * time.Second
// XAIModel implements ModelDriver for xAI (Grok models)
type XAIModel struct {
BaseURL map[string]string
URLSuffix URLSuffix
httpClient *http.Client // Reusable HTTP client with connection pool
}
// NewXAIModel creates a new xAI model instance.
//
// We clone http.DefaultTransport so we keep Go's defaults for
// ProxyFromEnvironment, DialContext (with KeepAlive), HTTP/2,
// TLSHandshakeTimeout, and ExpectContinueTimeout, and only override
// the few connection-pool fields we care about.
//
// The Client itself has no Timeout. http.Client.Timeout would also
// cap the time spent reading the response body, which would cut off
// long-lived SSE streams in ChatStreamlyWithSender. Non-streaming
// callers wrap each request with context.WithTimeout instead.
func NewXAIModel(baseURL map[string]string, urlSuffix URLSuffix) *XAIModel {
transport := http.DefaultTransport.(*http.Transport).Clone()
transport.MaxIdleConns = 100
transport.MaxIdleConnsPerHost = 10
transport.IdleConnTimeout = 90 * time.Second
transport.DisableCompression = false
return &XAIModel{
BaseURL: baseURL,
URLSuffix: urlSuffix,
httpClient: &http.Client{
Transport: transport,
},
}
}
func (z *XAIModel) NewInstance(baseURL map[string]string) ModelDriver {
return NewXAIModel(baseURL, z.URLSuffix)
}
func (z *XAIModel) Name() string {
return "xai"
}
// baseURLForRegion returns the base URL for the given region, or an
// error if no entry exists. This makes a misconfigured region fail
// fast with a clear message, instead of silently producing a relative
// URL that the HTTP transport then rejects.
func (z *XAIModel) baseURLForRegion(region string) (string, error) {
base, ok := z.BaseURL[region]
if !ok || base == "" {
return "", fmt.Errorf("xai: no base URL configured for region %q", region)
}
return base, nil
}
// ChatWithMessages sends multiple messages with roles and returns the response
func (z *XAIModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
return nil, fmt.Errorf("api key is required")
}
if len(messages) == 0 {
return nil, fmt.Errorf("messages is empty")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
}
baseURL, err := z.baseURLForRegion(region)
if err != nil {
return nil, err
}
url := fmt.Sprintf("%s/%s", baseURL, z.URLSuffix.Chat)
// Convert messages to the format expected by the API
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body
reqBody := map[string]interface{}{
"model": modelName,
"messages": apiMessages,
"stream": false,
"temperature": 1,
}
// Note: do NOT propagate chatModelConfig.Stream into the request body
// here. ChatWithMessages parses a single JSON response, so SSE/stream
// must always be off for this code path.
if chatModelConfig != nil {
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
}
if chatModelConfig.Temperature != nil {
reqBody["temperature"] = *chatModelConfig.Temperature
}
if chatModelConfig.TopP != nil {
reqBody["top_p"] = *chatModelConfig.TopP
}
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
resp, err := z.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
// Parse response
var result map[string]interface{}
if err = json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
choices, ok := result["choices"].([]interface{})
if !ok || len(choices) == 0 {
return nil, fmt.Errorf("no choices in response")
}
firstChoice, ok := choices[0].(map[string]interface{})
if !ok {
return nil, fmt.Errorf("invalid choice format")
}
messageMap, ok := firstChoice["message"].(map[string]interface{})
if !ok {
return nil, fmt.Errorf("invalid message format")
}
content, ok := messageMap["content"].(string)
if !ok {
return nil, fmt.Errorf("invalid content format")
}
// xAI reasoning models (grok-3-mini and similar) return reasoning text in
// the reasoning_content field. Pass it through when present.
var reasonContent string
if rc, ok := messageMap["reasoning_content"].(string); ok {
reasonContent = rc
if reasonContent != "" && reasonContent[0] == '\n' {
reasonContent = reasonContent[1:]
}
}
chatResponse := &ChatResponse{
Answer: &content,
ReasonContent: &reasonContent,
}
return chatResponse, nil
}
// ChatStreamlyWithSender sends messages and streams the response via the
// sender function. Used for streaming chat responses with no extra channel.
func (z *XAIModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
return fmt.Errorf("api key is required")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
}
baseURL, err := z.baseURLForRegion(region)
if err != nil {
return err
}
url := fmt.Sprintf("%s/%s", baseURL, z.URLSuffix.Chat)
// Convert messages to API format (supports multimodal content)
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
// Build request body with streaming on by default
reqBody := map[string]interface{}{
"model": modelName,
"messages": apiMessages,
"stream": true,
}
if chatModelConfig != nil {
// Refuse to run if the caller explicitly asked for stream=false.
// The body of this method only knows how to read SSE, so a non-SSE
// JSON response would be parsed as if it were a stream and produce
// no chunks. Better to fail clearly. Leave reqBody["stream"] as
// the default (true) when Stream is nil or true.
if chatModelConfig.Stream != nil && !*chatModelConfig.Stream {
return fmt.Errorf("stream must be true in ChatStreamlyWithSender")
}
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
}
if chatModelConfig.Temperature != nil {
reqBody["temperature"] = *chatModelConfig.Temperature
}
if chatModelConfig.TopP != nil {
reqBody["top_p"] = *chatModelConfig.TopP
}
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
resp, err := z.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
// SSE parsing: read line by line. The default bufio.Scanner buffer
// is 64KB, which can be too small for long SSE chunks. Bump it to
// 1MB so we never silently truncate a long data: line.
scanner := bufio.NewScanner(resp.Body)
scanner.Buffer(make([]byte, 64*1024), 1024*1024)
// sawTerminal flips to true when the upstream actually told us the
// stream is over (either a "[DONE]" marker or a non-empty
// finish_reason). If the body closes before either of those, we
// must not emit a synthetic "[DONE]" because that would hide a
// truncated response from the caller.
sawTerminal := false
for scanner.Scan() {
line := scanner.Text()
// SSE data line starts with "data:"
if !strings.HasPrefix(line, "data:") {
continue
}
// Extract JSON after "data:"
data := strings.TrimSpace(line[5:])
// [DONE] marks the end of the stream
if data == "[DONE]" {
sawTerminal = true
break
}
// Parse the JSON event
var event map[string]interface{}
if err = json.Unmarshal([]byte(data), &event); err != nil {
continue
}
choices, ok := event["choices"].([]interface{})
if !ok || len(choices) == 0 {
continue
}
firstChoice, ok := choices[0].(map[string]interface{})
if !ok {
continue
}
delta, ok := firstChoice["delta"].(map[string]interface{})
if !ok {
continue
}
reasoningContent, ok := delta["reasoning_content"].(string)
if ok && reasoningContent != "" {
if err := sender(nil, &reasoningContent); err != nil {
return err
}
}
content, ok := delta["content"].(string)
if ok && content != "" {
if err := sender(&content, nil); err != nil {
return err
}
}
finishReason, ok := firstChoice["finish_reason"].(string)
if ok && finishReason != "" {
sawTerminal = true
break
}
}
if err := scanner.Err(); err != nil {
return fmt.Errorf("failed to scan response body: %w", err)
}
if !sawTerminal {
return fmt.Errorf("xai: stream ended before [DONE] or finish_reason")
}
// Send the [DONE] marker for OpenAI compatibility
endOfStream := "[DONE]"
if err := sender(&endOfStream, nil); err != nil {
return err
}
return nil
}
// Encode encodes a list of texts into embeddings. xAI does not expose a
// public embedding API yet, so this is left unimplemented.
func (z *XAIModel) Encode(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([][]float64, error) {
return nil, fmt.Errorf("not implemented")
}
// ListModels returns the list of model ids visible to the API key.
func (z *XAIModel) ListModels(apiConfig *APIConfig) ([]string, error) {
if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
return nil, fmt.Errorf("api key is required")
}
var region = "default"
if apiConfig.Region != nil {
region = *apiConfig.Region
}
baseURL, err := z.baseURLForRegion(region)
if err != nil {
return nil, err
}
url := fmt.Sprintf("%s/%s", baseURL, z.URLSuffix.Models)
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
resp, err := z.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
// Parse response
var result map[string]interface{}
if err = json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
data, ok := result["data"].([]interface{})
if !ok {
return nil, fmt.Errorf("invalid models list format")
}
models := make([]string, 0)
for _, model := range data {
modelMap, ok := model.(map[string]interface{})
if !ok {
continue
}
modelName, ok := modelMap["id"].(string)
if !ok {
continue
}
models = append(models, modelName)
}
return models, nil
}
// Balance is not exposed by the xAI API, so this returns "no such method".
func (z *XAIModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) {
return nil, fmt.Errorf("no such method")
}
// CheckConnection runs a lightweight ListModels call to verify the API key.
func (z *XAIModel) CheckConnection(apiConfig *APIConfig) error {
_, err := z.ListModels(apiConfig)
if err != nil {
return err
}
return nil
}
// Rerank calculates similarity scores between query and texts. xAI does not
// expose a rerank API, so this is left unimplemented.
func (z *XAIModel) Rerank(modelName *string, query string, texts []string, apiConfig *APIConfig) ([]float64, error) {
return nil, fmt.Errorf("%s, Rerank not implemented", z.Name())
}