Files
ragflow/internal/entity/models/nvidia.go

681 lines
19 KiB
Go
Raw Normal View History

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package models
import (
"bufio"
"bytes"
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// NvidiaModel implements ModelDriver for Nvidia
type NvidiaModel struct {
baseModel BaseModel
}
// NewNvidiaModel creates a new Nvidia model instance
func NewNvidiaModel(baseURL map[string]string, urlSuffix URLSuffix) *NvidiaModel {
return &NvidiaModel{
baseModel: BaseModel{
BaseURL: baseURL,
URLSuffix: urlSuffix,
httpClient: &http.Client{
Transport: &http.Transport{
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
DisableCompression: false,
},
},
},
}
}
func (n NvidiaModel) NewInstance(baseURL map[string]string) ModelDriver {
return NewNvidiaModel(baseURL, n.baseModel.URLSuffix)
}
func (n NvidiaModel) Name() string {
return "nvidia"
}
func (n *NvidiaModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
return nil, err
}
if len(messages) == 0 {
return nil, fmt.Errorf("messages is empty")
}
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
if err != nil {
return nil, err
}
baseURL := resolvedBaseURL
if baseURL == "" {
baseURL = resolvedBaseURL
}
url := fmt.Sprintf("%s/%s", baseURL, n.baseModel.URLSuffix.Chat)
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
reqBody := map[string]interface{}{
"model": modelName,
"messages": apiMessages,
"stream": false,
}
if chatModelConfig != nil {
if chatModelConfig.Stream != nil {
reqBody["stream"] = *chatModelConfig.Stream
}
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
}
if chatModelConfig.Temperature != nil {
reqBody["temperature"] = *chatModelConfig.Temperature
}
if chatModelConfig.TopP != nil {
reqBody["top_p"] = *chatModelConfig.TopP
}
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
if chatModelConfig.Thinking != nil {
if *chatModelConfig.Thinking {
reqBody["thinking"] = map[string]interface{}{"type": "enabled"}
} else {
reqBody["thinking"] = map[string]interface{}{"type": "disabled"}
}
}
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
resp, err := n.baseModel.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
var result map[string]interface{}
if err = json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
choices, ok := result["choices"].([]interface{})
if !ok || len(choices) == 0 {
return nil, fmt.Errorf("no choices in response")
}
firstChoice, ok := choices[0].(map[string]interface{})
if !ok {
return nil, fmt.Errorf("invalid choice format")
}
messageMap, ok := firstChoice["message"].(map[string]interface{})
if !ok {
return nil, fmt.Errorf("invalid message format")
}
content, ok := messageMap["content"].(string)
if !ok {
return nil, fmt.Errorf("invalid content format")
}
var reasonContent string
if chatModelConfig != nil && chatModelConfig.Thinking != nil && *chatModelConfig.Thinking {
reasonContent, ok = messageMap["reasoning_content"].(string)
if !ok {
return nil, fmt.Errorf("invalid content format")
}
if reasonContent != "" && reasonContent[0] == '\n' {
reasonContent = reasonContent[1:]
}
}
chatResponse := &ChatResponse{
Answer: &content,
ReasonContent: &reasonContent,
}
return chatResponse, nil
}
func (n *NvidiaModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
return err
}
if sender == nil {
return fmt.Errorf("sender is required")
}
if len(messages) == 0 {
return fmt.Errorf("messages is empty")
}
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
if err != nil {
return err
}
baseURL := resolvedBaseURL
if baseURL == "" {
baseURL = resolvedBaseURL
}
url := fmt.Sprintf("%s/%s", baseURL, n.baseModel.URLSuffix.Chat)
apiMessages := make([]map[string]interface{}, len(messages))
for i, msg := range messages {
apiMessages[i] = map[string]interface{}{
"role": msg.Role,
"content": msg.Content,
}
}
reqBody := map[string]interface{}{
"model": modelName,
"messages": apiMessages,
"stream": true,
}
if modelConfig != nil {
if modelConfig.Stream != nil {
reqBody["stream"] = *modelConfig.Stream
}
if modelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *modelConfig.MaxTokens
}
if modelConfig.Temperature != nil {
reqBody["temperature"] = *modelConfig.Temperature
}
if modelConfig.DoSample != nil {
reqBody["do_sample"] = *modelConfig.DoSample
}
if modelConfig.TopP != nil {
reqBody["top_p"] = *modelConfig.TopP
}
if modelConfig.Stop != nil {
reqBody["stop"] = *modelConfig.Stop
}
if modelConfig.Thinking != nil {
if *modelConfig.Thinking {
reqBody["thinking"] = map[string]interface{}{"type": "enabled"}
} else {
reqBody["thinking"] = map[string]interface{}{"type": "disabled"}
}
}
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return fmt.Errorf("failed to marshal request: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), streamCallTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
resp, err := n.baseModel.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
scanner := bufio.NewScanner(resp.Body)
fix(go-models): raise SSE scanner buffer so large stream chunks are not dropped (#15382) ### Summary Closes #15381 Every provider in `internal/entity/models/` reads its streaming response with `bufio.NewScanner(resp.Body)` and iterates over `scanner.Scan()`. The default `bufio.Scanner` maximum token size is 64KB, so when an upstream sends a single SSE `data:` line larger than 64KB (long content deltas, large tool or function call argument blobs, bundled `reasoning_content`, or providers that emit a whole message in one event) `scanner.Scan()` returns `false` and `scanner.Err()` returns `bufio.ErrTooLong`. Streaming chat then ends with an error partway through the response. This change adds `scanner.Buffer(make([]byte, 64*1024), 1024*1024)` immediately after every SSE scanner that was still bare, raising the cap to 1MB. 1MB is the value already used for streaming chat in `openai.go`, `modelscope.go`, `groq.go`, `mistral.go`, `xai.go` and the other already patched providers (the 8MB cap in the repo is reserved for TTS and embedding paths), so this simply converges the remaining providers onto the established pattern. Nothing else changes: line parsing, `data:` prefix handling, `[DONE]` detection, JSON unmarshalling, error handling, and the existing `scanner.Err()` checks all stay the same. Providers covered (23 scanners across 22 files): 302ai, aliyun, baichuan, baidu, cohere, deepinfra, deepseek, gitee, huggingface, lmstudio, minimax (the chat scanner, whose TTS scanner was already bumped), moonshot, nvidia, ollama, openrouter, orcarouter, paddleocr, siliconflow, tokenhub, vllm, volcengine, xunfei, zhipu-ai. `jiekouai.go` is excluded because it is covered by the in flight #15337. A table driven regression test (`sse_scanner_buffer_test.go`) streams a single 128KB `data:` content delta followed by `data: [DONE]` through an `httptest` server and asserts that `ChatStreamlyWithSender` delivers the full content with no error across a representative subset of providers. Without the buffer fix the test fails with `bufio.Scanner: token too long`. This PR also removes three duplicate declarations of the package level `roundTripperFunc` test helper that several recently merged provider PRs each added independently, which had left the `internal/entity/models` test package unable to compile. The helper now lives in a single place and is shared. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-05-29 07:34:00 -04:00
scanner.Buffer(make([]byte, 64*1024), 1024*1024)
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "data:") {
continue
}
data := strings.TrimSpace(line[5:])
if data == "[DONE]" {
break
}
var event map[string]interface{}
if err = json.Unmarshal([]byte(data), &event); err != nil {
continue
}
choices, ok := event["choices"].([]interface{})
if !ok || len(choices) == 0 {
continue
}
firstChoice, ok := choices[0].(map[string]interface{})
if !ok {
continue
}
delta, ok := firstChoice["delta"].(map[string]interface{})
if !ok {
continue
}
reasoningContent, ok := delta["reasoning_content"].(string)
if ok && reasoningContent != "" {
if err := sender(nil, &reasoningContent); err != nil {
return err
}
}
content, ok := delta["content"].(string)
if ok && content != "" {
if err := sender(&content, nil); err != nil {
return err
}
}
finishReason, ok := firstChoice["finish_reason"].(string)
if ok && finishReason != "" {
break
}
}
endOfStream := "[DONE]"
if err = sender(&endOfStream, nil); err != nil {
return err
}
return scanner.Err()
}
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
type nvidiaEmbeddingResponse struct {
Data []struct {
Index int `json:"index"`
Embedding []float64 `json:"embedding"`
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
} `json:"data"`
}
func (n NvidiaModel) Embed(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([]EmbeddingData, error) {
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
return nil, err
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
}
if len(texts) == 0 {
return []EmbeddingData{}, nil
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
}
if modelName == nil || *modelName == "" {
return nil, fmt.Errorf("model name is required")
}
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
if err != nil {
return nil, err
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
}
baseURL := resolvedBaseURL
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
if baseURL == "" {
baseURL = resolvedBaseURL
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
}
url := fmt.Sprintf("%s/%s", strings.TrimSuffix(baseURL, "/"), n.baseModel.URLSuffix.Embedding)
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
reqBody := map[string]interface{}{
"model": *modelName,
"input": texts,
"input_type": "query",
"encoding_format": "float",
"truncate": "END",
}
if embeddingConfig != nil && embeddingConfig.Dimension > 0 {
reqBody["dimensions"] = embeddingConfig.Dimension
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
defer cancel()
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
resp, err := n.baseModel.httpClient.Do(req)
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Nvidia embeddings API error: %s, body: %s", resp.Status, string(body))
}
var parsed nvidiaEmbeddingResponse
if err = json.Unmarshal(body, &parsed); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
var embeddings []EmbeddingData
for _, dataElem := range parsed.Data {
var embeddingData EmbeddingData
embeddingData.Embedding = dataElem.Embedding
embeddingData.Index = dataElem.Index
embeddings = append(embeddings, embeddingData)
Go: implement Encode (embeddings) in NVIDIA driver (#14700) ### What problem does this PR solve? The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with a stub `Encode` method that returned `no such method`. `conf/models/nvidia.json` already lists `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model, but the conf had no `embedding` URL suffix, so the picker had nothing wired even if `Encode` worked. A tenant who wanted to use NVIDIA NIM for chat (already working) and embeddings from a single provider could not, even though the upstream endpoint is public at `https://integrate.api.nvidia.com/v1/embeddings` and uses an OpenAI-compatible request body extended with the NVIDIA-specific `input_type` and `truncate` fields. Several other Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun), so the interface and the pattern are well-established. This PR fills the gap. ### What this PR includes * `conf/models/nvidia.json`: declare the `embedding` URL suffix alongside the existing `chat` and `models` entries. The embedding model entry was already present, so no model addition is needed. * `internal/entity/models/nvidia.go`: replace the `Encode` stub with a real implementation. Adds a small local response type that matches the OpenAI-compatible shape NVIDIA NIM returns. No factory change. No interface change. ### How the driver works * Validates `apiConfig` and the API key, validates the model name, resolves the region with a default fallback (matching the pattern the merged `ListModels` and `CheckConnection` paths in this driver already use), and builds the URL from `BaseURL[region] + URLSuffix.Embedding`. * Sends all input texts in one request as the `input` array, with the NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and `truncate: "END"` fields, mirroring the Python `NvidiaEmbed` reference. * Parses `data[*].embedding` and copies each slice into `[][]float64` indexed by `data[*].index` so the output order matches the input order even if the API returns items in a different order. * Handles both `float64` and `float32` element types. * Empty input returns `[][]float64{}` with no HTTP call. * Non-200 responses propagate the upstream status line and body. * A final pass checks every input slot got a vector and returns a clear error if any slot is still nil. * Per-call 30s context deadline so a slow call cannot block forever. ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### How was this tested? * `go build ./internal/entity/models/...` returns exit 0. * `go vet ./internal/entity/models/...` is clean. * `gofmt -l internal/entity/models/nvidia.go` is clean. * The full method set on `NvidiaModel` still matches the `ModelDriver` interface. * Pattern parity with the just-merged Aliyun `Encode` (#14647). Closes #14699
2026-05-10 18:50:50 -10:00
}
return embeddings, nil
}
Go: implement Rerank in NVIDIA driver (#14778) ## Summary - Replaces the `"no such method"` stub on `NvidiaModel.Rerank` (`internal/entity/models/nvidia.go`) with a real implementation against NVIDIA NIM's `/ranking` endpoint. - Mirrors the existing Python `NvidiaRerank` class at `rag/llm/rerank_model.py:149-190` for behavior parity: same `passages`/`query.text`/`logit` payload shape; `top_n` set to `len(documents)` so every input gets a score returned in original order (the issue body's spec omitted `top_n`, which would cause silent data loss). - Adds the `"rerank": "ranking"` URL suffix and two NIM rerank model entries (`nvidia/nv-rerankqa-mistral-4b-v3`, `nvidia/llama-3.2-nv-rerankqa-1b-v2`) to `conf/models/nvidia.json` so the picker exposes them. - Follows the same shape as the recently merged Aliyun (#14676), Gitee (#14656), and ZhipuAI (#14608) Rerank implementations: lowercase per-driver request/response types, conversion to the project-wide `RerankResponse{Data: []RerankResult}`, per-call `context.WithTimeout` of 30s. Closes #14720 ## Test plan - [x] `gofmt -l internal/entity/models/nvidia.go` — clean - [x] `go vet ./internal/entity/models/...` — no new errors introduced (the two pre-existing vet errors in `baidu.go:642` and `openrouter.go:566` are unrelated to this PR) - [x] `go build ./internal/entity/models/...` — succeeds - [x] `python3 -c "import json; json.load(open('conf/models/nvidia.json'))"` — JSON valid - [ ] Live smoke test against NVIDIA NIM with a real API key (requires reviewer with NIM credentials) ## Notes for reviewers - The issue body suggested omitting `top_n`. The Python reference includes it (`top_n: len(texts)`), and without it NVIDIA returns only the default top-K rankings rather than scores for every input. This PR follows the Python. - The URL host is `integrate.api.nvidia.com` (kept consistent with the existing chat/embeddings BaseURL in `nvidia.go`), not the legacy `ai.api.nvidia.com` host the Python uses. NIM's unified endpoint accepts the model names as-is, so no per-model URL transform is needed.
2026-05-11 11:21:16 +02:00
// nvidiaRerankRequest mirrors the NIM /ranking request shape:
// query is an object with a "text" field, passages is an array of
// objects each with a "text" field. truncate=END matches the Python
// NvidiaRerank reference at rag/llm/rerank_model.py.
type nvidiaRerankRequest struct {
Model string `json:"model"`
Query nvidiaRerankText `json:"query"`
Passages []nvidiaRerankText `json:"passages"`
Truncate string `json:"truncate,omitempty"`
TopN int `json:"top_n"`
}
type nvidiaRerankText struct {
Text string `json:"text"`
}
// nvidiaRerankResponse maps the NIM rankings array. Each entry pairs
// the original passage index with a logit score; the caller uses the
// index to restore original input order.
type nvidiaRerankResponse struct {
Rankings []struct {
Index int `json:"index"`
Logit float64 `json:"logit"`
} `json:"rankings"`
}
// Rerank scores documents against the query using an NVIDIA NIM
// reranking model. Mirrors the Python NvidiaRerank class in
// rag/llm/rerank_model.py for payload shape (passages/query/logit).
// Defaults top_n to len(documents) so the API returns a score per
// input; callers may shrink it via RerankConfig.TopN, in which case
// only the top RerankConfig.TopN entries come back. Returned
// RerankResult entries are in the API's ranking order; callers that
// need original-input order should sort by Index. Same return-shape
// contract as the Aliyun and ZhipuAI Rerank drivers.
func (n NvidiaModel) Rerank(modelName *string, query string, documents []string, apiConfig *APIConfig, rerankConfig *RerankConfig) (*RerankResponse, error) {
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
return nil, err
}
Go: implement Rerank in NVIDIA driver (#14778) ## Summary - Replaces the `"no such method"` stub on `NvidiaModel.Rerank` (`internal/entity/models/nvidia.go`) with a real implementation against NVIDIA NIM's `/ranking` endpoint. - Mirrors the existing Python `NvidiaRerank` class at `rag/llm/rerank_model.py:149-190` for behavior parity: same `passages`/`query.text`/`logit` payload shape; `top_n` set to `len(documents)` so every input gets a score returned in original order (the issue body's spec omitted `top_n`, which would cause silent data loss). - Adds the `"rerank": "ranking"` URL suffix and two NIM rerank model entries (`nvidia/nv-rerankqa-mistral-4b-v3`, `nvidia/llama-3.2-nv-rerankqa-1b-v2`) to `conf/models/nvidia.json` so the picker exposes them. - Follows the same shape as the recently merged Aliyun (#14676), Gitee (#14656), and ZhipuAI (#14608) Rerank implementations: lowercase per-driver request/response types, conversion to the project-wide `RerankResponse{Data: []RerankResult}`, per-call `context.WithTimeout` of 30s. Closes #14720 ## Test plan - [x] `gofmt -l internal/entity/models/nvidia.go` — clean - [x] `go vet ./internal/entity/models/...` — no new errors introduced (the two pre-existing vet errors in `baidu.go:642` and `openrouter.go:566` are unrelated to this PR) - [x] `go build ./internal/entity/models/...` — succeeds - [x] `python3 -c "import json; json.load(open('conf/models/nvidia.json'))"` — JSON valid - [ ] Live smoke test against NVIDIA NIM with a real API key (requires reviewer with NIM credentials) ## Notes for reviewers - The issue body suggested omitting `top_n`. The Python reference includes it (`top_n: len(texts)`), and without it NVIDIA returns only the default top-K rankings rather than scores for every input. This PR follows the Python. - The URL host is `integrate.api.nvidia.com` (kept consistent with the existing chat/embeddings BaseURL in `nvidia.go`), not the legacy `ai.api.nvidia.com` host the Python uses. NIM's unified endpoint accepts the model names as-is, so no per-model URL transform is needed.
2026-05-11 11:21:16 +02:00
if len(documents) == 0 {
return &RerankResponse{}, nil
}
if modelName == nil || *modelName == "" {
return nil, fmt.Errorf("model name is required")
}
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
if err != nil {
return nil, err
Go: implement Rerank in NVIDIA driver (#14778) ## Summary - Replaces the `"no such method"` stub on `NvidiaModel.Rerank` (`internal/entity/models/nvidia.go`) with a real implementation against NVIDIA NIM's `/ranking` endpoint. - Mirrors the existing Python `NvidiaRerank` class at `rag/llm/rerank_model.py:149-190` for behavior parity: same `passages`/`query.text`/`logit` payload shape; `top_n` set to `len(documents)` so every input gets a score returned in original order (the issue body's spec omitted `top_n`, which would cause silent data loss). - Adds the `"rerank": "ranking"` URL suffix and two NIM rerank model entries (`nvidia/nv-rerankqa-mistral-4b-v3`, `nvidia/llama-3.2-nv-rerankqa-1b-v2`) to `conf/models/nvidia.json` so the picker exposes them. - Follows the same shape as the recently merged Aliyun (#14676), Gitee (#14656), and ZhipuAI (#14608) Rerank implementations: lowercase per-driver request/response types, conversion to the project-wide `RerankResponse{Data: []RerankResult}`, per-call `context.WithTimeout` of 30s. Closes #14720 ## Test plan - [x] `gofmt -l internal/entity/models/nvidia.go` — clean - [x] `go vet ./internal/entity/models/...` — no new errors introduced (the two pre-existing vet errors in `baidu.go:642` and `openrouter.go:566` are unrelated to this PR) - [x] `go build ./internal/entity/models/...` — succeeds - [x] `python3 -c "import json; json.load(open('conf/models/nvidia.json'))"` — JSON valid - [ ] Live smoke test against NVIDIA NIM with a real API key (requires reviewer with NIM credentials) ## Notes for reviewers - The issue body suggested omitting `top_n`. The Python reference includes it (`top_n: len(texts)`), and without it NVIDIA returns only the default top-K rankings rather than scores for every input. This PR follows the Python. - The URL host is `integrate.api.nvidia.com` (kept consistent with the existing chat/embeddings BaseURL in `nvidia.go`), not the legacy `ai.api.nvidia.com` host the Python uses. NIM's unified endpoint accepts the model names as-is, so no per-model URL transform is needed.
2026-05-11 11:21:16 +02:00
}
baseURL := resolvedBaseURL
Go: implement Rerank in NVIDIA driver (#14778) ## Summary - Replaces the `"no such method"` stub on `NvidiaModel.Rerank` (`internal/entity/models/nvidia.go`) with a real implementation against NVIDIA NIM's `/ranking` endpoint. - Mirrors the existing Python `NvidiaRerank` class at `rag/llm/rerank_model.py:149-190` for behavior parity: same `passages`/`query.text`/`logit` payload shape; `top_n` set to `len(documents)` so every input gets a score returned in original order (the issue body's spec omitted `top_n`, which would cause silent data loss). - Adds the `"rerank": "ranking"` URL suffix and two NIM rerank model entries (`nvidia/nv-rerankqa-mistral-4b-v3`, `nvidia/llama-3.2-nv-rerankqa-1b-v2`) to `conf/models/nvidia.json` so the picker exposes them. - Follows the same shape as the recently merged Aliyun (#14676), Gitee (#14656), and ZhipuAI (#14608) Rerank implementations: lowercase per-driver request/response types, conversion to the project-wide `RerankResponse{Data: []RerankResult}`, per-call `context.WithTimeout` of 30s. Closes #14720 ## Test plan - [x] `gofmt -l internal/entity/models/nvidia.go` — clean - [x] `go vet ./internal/entity/models/...` — no new errors introduced (the two pre-existing vet errors in `baidu.go:642` and `openrouter.go:566` are unrelated to this PR) - [x] `go build ./internal/entity/models/...` — succeeds - [x] `python3 -c "import json; json.load(open('conf/models/nvidia.json'))"` — JSON valid - [ ] Live smoke test against NVIDIA NIM with a real API key (requires reviewer with NIM credentials) ## Notes for reviewers - The issue body suggested omitting `top_n`. The Python reference includes it (`top_n: len(texts)`), and without it NVIDIA returns only the default top-K rankings rather than scores for every input. This PR follows the Python. - The URL host is `integrate.api.nvidia.com` (kept consistent with the existing chat/embeddings BaseURL in `nvidia.go`), not the legacy `ai.api.nvidia.com` host the Python uses. NIM's unified endpoint accepts the model names as-is, so no per-model URL transform is needed.
2026-05-11 11:21:16 +02:00
if baseURL == "" {
baseURL = resolvedBaseURL
Go: implement Rerank in NVIDIA driver (#14778) ## Summary - Replaces the `"no such method"` stub on `NvidiaModel.Rerank` (`internal/entity/models/nvidia.go`) with a real implementation against NVIDIA NIM's `/ranking` endpoint. - Mirrors the existing Python `NvidiaRerank` class at `rag/llm/rerank_model.py:149-190` for behavior parity: same `passages`/`query.text`/`logit` payload shape; `top_n` set to `len(documents)` so every input gets a score returned in original order (the issue body's spec omitted `top_n`, which would cause silent data loss). - Adds the `"rerank": "ranking"` URL suffix and two NIM rerank model entries (`nvidia/nv-rerankqa-mistral-4b-v3`, `nvidia/llama-3.2-nv-rerankqa-1b-v2`) to `conf/models/nvidia.json` so the picker exposes them. - Follows the same shape as the recently merged Aliyun (#14676), Gitee (#14656), and ZhipuAI (#14608) Rerank implementations: lowercase per-driver request/response types, conversion to the project-wide `RerankResponse{Data: []RerankResult}`, per-call `context.WithTimeout` of 30s. Closes #14720 ## Test plan - [x] `gofmt -l internal/entity/models/nvidia.go` — clean - [x] `go vet ./internal/entity/models/...` — no new errors introduced (the two pre-existing vet errors in `baidu.go:642` and `openrouter.go:566` are unrelated to this PR) - [x] `go build ./internal/entity/models/...` — succeeds - [x] `python3 -c "import json; json.load(open('conf/models/nvidia.json'))"` — JSON valid - [ ] Live smoke test against NVIDIA NIM with a real API key (requires reviewer with NIM credentials) ## Notes for reviewers - The issue body suggested omitting `top_n`. The Python reference includes it (`top_n: len(texts)`), and without it NVIDIA returns only the default top-K rankings rather than scores for every input. This PR follows the Python. - The URL host is `integrate.api.nvidia.com` (kept consistent with the existing chat/embeddings BaseURL in `nvidia.go`), not the legacy `ai.api.nvidia.com` host the Python uses. NIM's unified endpoint accepts the model names as-is, so no per-model URL transform is needed.
2026-05-11 11:21:16 +02:00
}
url := fmt.Sprintf("%s/%s", strings.TrimSuffix(baseURL, "/"), n.baseModel.URLSuffix.Rerank)
Go: implement Rerank in NVIDIA driver (#14778) ## Summary - Replaces the `"no such method"` stub on `NvidiaModel.Rerank` (`internal/entity/models/nvidia.go`) with a real implementation against NVIDIA NIM's `/ranking` endpoint. - Mirrors the existing Python `NvidiaRerank` class at `rag/llm/rerank_model.py:149-190` for behavior parity: same `passages`/`query.text`/`logit` payload shape; `top_n` set to `len(documents)` so every input gets a score returned in original order (the issue body's spec omitted `top_n`, which would cause silent data loss). - Adds the `"rerank": "ranking"` URL suffix and two NIM rerank model entries (`nvidia/nv-rerankqa-mistral-4b-v3`, `nvidia/llama-3.2-nv-rerankqa-1b-v2`) to `conf/models/nvidia.json` so the picker exposes them. - Follows the same shape as the recently merged Aliyun (#14676), Gitee (#14656), and ZhipuAI (#14608) Rerank implementations: lowercase per-driver request/response types, conversion to the project-wide `RerankResponse{Data: []RerankResult}`, per-call `context.WithTimeout` of 30s. Closes #14720 ## Test plan - [x] `gofmt -l internal/entity/models/nvidia.go` — clean - [x] `go vet ./internal/entity/models/...` — no new errors introduced (the two pre-existing vet errors in `baidu.go:642` and `openrouter.go:566` are unrelated to this PR) - [x] `go build ./internal/entity/models/...` — succeeds - [x] `python3 -c "import json; json.load(open('conf/models/nvidia.json'))"` — JSON valid - [ ] Live smoke test against NVIDIA NIM with a real API key (requires reviewer with NIM credentials) ## Notes for reviewers - The issue body suggested omitting `top_n`. The Python reference includes it (`top_n: len(texts)`), and without it NVIDIA returns only the default top-K rankings rather than scores for every input. This PR follows the Python. - The URL host is `integrate.api.nvidia.com` (kept consistent with the existing chat/embeddings BaseURL in `nvidia.go`), not the legacy `ai.api.nvidia.com` host the Python uses. NIM's unified endpoint accepts the model names as-is, so no per-model URL transform is needed.
2026-05-11 11:21:16 +02:00
topN := len(documents)
if rerankConfig != nil && rerankConfig.TopN > 0 && rerankConfig.TopN < topN {
topN = rerankConfig.TopN
}
passages := make([]nvidiaRerankText, len(documents))
for i, doc := range documents {
passages[i] = nvidiaRerankText{Text: doc}
}
reqBody := nvidiaRerankRequest{
Model: *modelName,
Query: nvidiaRerankText{Text: query},
Passages: passages,
Truncate: "END",
TopN: topN,
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
Go: implement Rerank in NVIDIA driver (#14778) ## Summary - Replaces the `"no such method"` stub on `NvidiaModel.Rerank` (`internal/entity/models/nvidia.go`) with a real implementation against NVIDIA NIM's `/ranking` endpoint. - Mirrors the existing Python `NvidiaRerank` class at `rag/llm/rerank_model.py:149-190` for behavior parity: same `passages`/`query.text`/`logit` payload shape; `top_n` set to `len(documents)` so every input gets a score returned in original order (the issue body's spec omitted `top_n`, which would cause silent data loss). - Adds the `"rerank": "ranking"` URL suffix and two NIM rerank model entries (`nvidia/nv-rerankqa-mistral-4b-v3`, `nvidia/llama-3.2-nv-rerankqa-1b-v2`) to `conf/models/nvidia.json` so the picker exposes them. - Follows the same shape as the recently merged Aliyun (#14676), Gitee (#14656), and ZhipuAI (#14608) Rerank implementations: lowercase per-driver request/response types, conversion to the project-wide `RerankResponse{Data: []RerankResult}`, per-call `context.WithTimeout` of 30s. Closes #14720 ## Test plan - [x] `gofmt -l internal/entity/models/nvidia.go` — clean - [x] `go vet ./internal/entity/models/...` — no new errors introduced (the two pre-existing vet errors in `baidu.go:642` and `openrouter.go:566` are unrelated to this PR) - [x] `go build ./internal/entity/models/...` — succeeds - [x] `python3 -c "import json; json.load(open('conf/models/nvidia.json'))"` — JSON valid - [ ] Live smoke test against NVIDIA NIM with a real API key (requires reviewer with NIM credentials) ## Notes for reviewers - The issue body suggested omitting `top_n`. The Python reference includes it (`top_n: len(texts)`), and without it NVIDIA returns only the default top-K rankings rather than scores for every input. This PR follows the Python. - The URL host is `integrate.api.nvidia.com` (kept consistent with the existing chat/embeddings BaseURL in `nvidia.go`), not the legacy `ai.api.nvidia.com` host the Python uses. NIM's unified endpoint accepts the model names as-is, so no per-model URL transform is needed.
2026-05-11 11:21:16 +02:00
defer cancel()
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
resp, err := n.baseModel.httpClient.Do(req)
Go: implement Rerank in NVIDIA driver (#14778) ## Summary - Replaces the `"no such method"` stub on `NvidiaModel.Rerank` (`internal/entity/models/nvidia.go`) with a real implementation against NVIDIA NIM's `/ranking` endpoint. - Mirrors the existing Python `NvidiaRerank` class at `rag/llm/rerank_model.py:149-190` for behavior parity: same `passages`/`query.text`/`logit` payload shape; `top_n` set to `len(documents)` so every input gets a score returned in original order (the issue body's spec omitted `top_n`, which would cause silent data loss). - Adds the `"rerank": "ranking"` URL suffix and two NIM rerank model entries (`nvidia/nv-rerankqa-mistral-4b-v3`, `nvidia/llama-3.2-nv-rerankqa-1b-v2`) to `conf/models/nvidia.json` so the picker exposes them. - Follows the same shape as the recently merged Aliyun (#14676), Gitee (#14656), and ZhipuAI (#14608) Rerank implementations: lowercase per-driver request/response types, conversion to the project-wide `RerankResponse{Data: []RerankResult}`, per-call `context.WithTimeout` of 30s. Closes #14720 ## Test plan - [x] `gofmt -l internal/entity/models/nvidia.go` — clean - [x] `go vet ./internal/entity/models/...` — no new errors introduced (the two pre-existing vet errors in `baidu.go:642` and `openrouter.go:566` are unrelated to this PR) - [x] `go build ./internal/entity/models/...` — succeeds - [x] `python3 -c "import json; json.load(open('conf/models/nvidia.json'))"` — JSON valid - [ ] Live smoke test against NVIDIA NIM with a real API key (requires reviewer with NIM credentials) ## Notes for reviewers - The issue body suggested omitting `top_n`. The Python reference includes it (`top_n: len(texts)`), and without it NVIDIA returns only the default top-K rankings rather than scores for every input. This PR follows the Python. - The URL host is `integrate.api.nvidia.com` (kept consistent with the existing chat/embeddings BaseURL in `nvidia.go`), not the legacy `ai.api.nvidia.com` host the Python uses. NIM's unified endpoint accepts the model names as-is, so no per-model URL transform is needed.
2026-05-11 11:21:16 +02:00
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Nvidia rerank API error: %s, body: %s", resp.Status, string(body))
}
var parsed nvidiaRerankResponse
if err = json.Unmarshal(body, &parsed); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
rerankResponse := RerankResponse{Data: make([]RerankResult, 0, len(parsed.Rankings))}
for _, r := range parsed.Rankings {
if r.Index < 0 || r.Index >= len(documents) {
return nil, fmt.Errorf("unexpected rerank index %d for %d inputs", r.Index, len(documents))
}
rerankResponse.Data = append(rerankResponse.Data, RerankResult{
Index: r.Index,
RelevanceScore: r.Logit,
})
}
return &rerankResponse, nil
}
// TranscribeAudio transcribe audio
func (n *NvidiaModel) TranscribeAudio(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig) (*ASRResponse, error) {
return nil, fmt.Errorf("%s, no such method", n.Name())
}
func (n *NvidiaModel) TranscribeAudioWithSender(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig, sender func(*string, *string) error) error {
return fmt.Errorf("%s, no such method", n.Name())
}
Go: implement PaddleOCR provider and implement ASR for CoHere (#14954) ### What problem does this PR solve? This PR implement implement OCR for Baidu and Mistral, implement PaddleOCR provider and implement ASR for CoHere **Verified examples from the CLI:** ``` RAGFlow(user)> ocr with 'mistral-ocr-2512@test@mistral' file './internal/text.jpg' +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | text | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Parallel to these organizational innovations there were significant complementary technical innovations (e.g., improved methods of manufacturing cast-iron pipe and of coating interiors for pressure maintenance, and newer paving and construction material... | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ RAGFlow(user)> ocr with 'paddleocr-vl-0.9b@test@baidu' file './internal/text.jpg' +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | text | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Parallel to these organizational innovations there were significant complementary technical innovations (e.g., improved methods of manufacturing cast-iron pipe and of coating interiors for pressure maintenance, and newer paving and construction material... | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ # PaddleOCR RAGFlow(user)> ocr with 'PaddleOCR-VL-1.5@test@paddleocr' file './internal/test.pdf' +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | text | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | # Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation Bingxin Ke Nando Metzger Photogra Anton Obukhov Rodrigo Caye Daudt netry and Remote Sensing, Shengyu Huang Konrad Schindler ETH Zürich <div style="text-align: c... | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ # Cohere RAGFlow(user)> asr with 'cohere-transcribe-03-2026@test@cohere' audio './internal/test.wav' param '{"language": "en"}' +-----------------------------------------------------------------------------------------------------------------------+ | text | +-----------------------------------------------------------------------------------------------------------------------+ | The examination and testimony of the experts enabled the Commission to conclude that five shots may have been fired. | +-----------------------------------------------------------------------------------------------------------------------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) - [x] Refactoring
2026-05-15 18:41:43 +08:00
// AudioSpeech convert text to audio
func (n *NvidiaModel) AudioSpeech(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig) (*TTSResponse, error) {
return nil, fmt.Errorf("%s, no such method", n.Name())
}
func (n *NvidiaModel) AudioSpeechWithSender(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig, sender func(*string, *string) error) error {
return fmt.Errorf("%s, no such method", n.Name())
}
// OCRFile OCR file
func (n *NvidiaModel) OCRFile(modelName *string, content []byte, url *string, apiConfig *APIConfig, ocrConfig *OCRConfig) (*OCRFileResponse, error) {
return nil, fmt.Errorf("%s, no such method", n.Name())
}
Go: add file parse command (#14892) ### What problem does this PR solve? ``` RAGFlow(user)> ocr with 'hunyuanocr@test@gitee' file './picture.png' +----------------------------------------------------------+ | text | +----------------------------------------------------------+ | 生活不是等待风暴过去,而是学会在雨中翩翩起舞。 ——佚名 | +----------------------------------------------------------+ RAGFlow(user)> list 'test@gitee' tasks; +---------+----------------------------------+ | status | task_id | +---------+----------------------------------+ | success | C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5 | +---------+----------------------------------+ RAGFlow(user)> show 'test@gitee' task 'C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5'; +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | content | index | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | # PDF 1: Purpose of RAGFlow RAGFlow is an open source Retrieval-Augmented Generation (RAG) engine designed to turn raw documents into reliable context for large language models.Its purpose is to make it practical to build an Al assistant that can ans... | 1 | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-05-15 12:29:52 +08:00
// ParseFile parse file
func (n *NvidiaModel) ParseFile(modelName *string, content []byte, url *string, apiConfig *APIConfig, parseFileConfig *ParseFileConfig) (*ParseFileResponse, error) {
return nil, fmt.Errorf("%s, no such method", n.Name())
Go: add file parse command (#14892) ### What problem does this PR solve? ``` RAGFlow(user)> ocr with 'hunyuanocr@test@gitee' file './picture.png' +----------------------------------------------------------+ | text | +----------------------------------------------------------+ | 生活不是等待风暴过去,而是学会在雨中翩翩起舞。 ——佚名 | +----------------------------------------------------------+ RAGFlow(user)> list 'test@gitee' tasks; +---------+----------------------------------+ | status | task_id | +---------+----------------------------------+ | success | C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5 | +---------+----------------------------------+ RAGFlow(user)> show 'test@gitee' task 'C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5'; +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | content | index | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | # PDF 1: Purpose of RAGFlow RAGFlow is an open source Retrieval-Augmented Generation (RAG) engine designed to turn raw documents into reliable context for large language models.Its purpose is to make it practical to build an Al assistant that can ans... | 1 | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-05-15 12:29:52 +08:00
}
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
// ListModels calls /v1/models on the configured NVIDIA NIM base URL
// and returns the list of available model ids. The endpoint is
// OpenAI-compatible, so the parsing follows the same shape used by
// the moonshot, xai, and openai drivers.
func (n NvidiaModel) ListModels(apiConfig *APIConfig) ([]string, error) {
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
return nil, err
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
}
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
if err != nil {
return nil, err
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
}
baseURL := resolvedBaseURL
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
if baseURL == "" {
baseURL = resolvedBaseURL
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
}
url := fmt.Sprintf("%s/%s", baseURL, n.baseModel.URLSuffix.Models)
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
resp, err := n.baseModel.httpClient.Do(req)
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Nvidia models API error: %s, body: %s", resp.Status, string(body))
}
var result map[string]interface{}
if err = json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
data, ok := result["data"].([]interface{})
if !ok {
return nil, fmt.Errorf("invalid models list format")
}
models := make([]string, 0, len(data))
for _, item := range data {
m, ok := item.(map[string]interface{})
if !ok {
continue
}
id, ok := m["id"].(string)
if !ok {
continue
}
models = append(models, id)
}
return models, nil
}
func (n NvidiaModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) {
return nil, fmt.Errorf("no such method")
}
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
// CheckConnection verifies that the configured NVIDIA NIM base URL
// is reachable and that the API key is accepted, by issuing a
// lightweight ListModels call. Mirrors the pattern used by the xai,
// moonshot, deepseek, aliyun, and gitee drivers.
func (n NvidiaModel) CheckConnection(apiConfig *APIConfig) error {
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636) ### What problem does this PR solve? The NVIDIA Go driver added in #14623 has a real chat path, but \`ListModels\` and \`CheckConnection\` are stubs that always return \`no such method\`. So: - The model picker cannot auto-populate available NVIDIA NIM model ids. Users have to type the full id by hand (e.g. \`abacusai/dracarys-llama-3.1-70b-instruct\`). - The "Check connection" button always fails for NVIDIA, even when the base URL is reachable and the API key is accepted. NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same Bearer token used for chat. The \`conf/models/nvidia.json\` file already wires the \`models\` url_suffix, so no config change is needed. ### What this PR includes - \`internal/entity/models/nvidia.go\`: - \`ListModels\` now calls \`GET ${BaseURL}/${URLSuffix.Models}\`, parses \`response.data[*].id\`, and returns the list. Same shape as the moonshot, xai, and openai drivers. - \`CheckConnection\` now calls \`ListModels\` and returns its error. Same pattern xai, moonshot, deepseek, aliyun, and gitee already use. \`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and can be added in follow-ups. No JSON change. No factory change. No interface change. ### How the implementation works - Region resolution falls back to \`default\` when the supplied region is unknown, so a stray region value does not break a valid request. - The Authorization header is only set when \`apiConfig\` and \`ApiKey\` are non-nil and non-empty. This avoids a nil-pointer dereference and lets self-hosted NIM deployments without a key still work. - Non-200 responses propagate the upstream status line and body so the user sees a real error message. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) ### How was this tested? - \`go build ./internal/entity/models/...\` in a clean go 1.25 image (the go.mod minimum) returns exit 0. - The full method set on \`NvidiaModel\` still matches the \`ModelDriver\` interface. - Pattern parity with the existing xai, moonshot, deepseek, aliyun, gitee, and openai drivers. Closes #14635
2026-05-08 06:04:28 +02:00
_, err := n.ListModels(apiConfig)
return err
}
Go: add file parse command (#14892) ### What problem does this PR solve? ``` RAGFlow(user)> ocr with 'hunyuanocr@test@gitee' file './picture.png' +----------------------------------------------------------+ | text | +----------------------------------------------------------+ | 生活不是等待风暴过去,而是学会在雨中翩翩起舞。 ——佚名 | +----------------------------------------------------------+ RAGFlow(user)> list 'test@gitee' tasks; +---------+----------------------------------+ | status | task_id | +---------+----------------------------------+ | success | C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5 | +---------+----------------------------------+ RAGFlow(user)> show 'test@gitee' task 'C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5'; +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | content | index | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | # PDF 1: Purpose of RAGFlow RAGFlow is an open source Retrieval-Augmented Generation (RAG) engine designed to turn raw documents into reliable context for large language models.Its purpose is to make it practical to build an Al assistant that can ans... | 1 | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-05-15 12:29:52 +08:00
func (n *NvidiaModel) ListTasks(apiConfig *APIConfig) ([]ListTaskStatus, error) {
return nil, fmt.Errorf("%s, no such method", n.Name())
Go: add file parse command (#14892) ### What problem does this PR solve? ``` RAGFlow(user)> ocr with 'hunyuanocr@test@gitee' file './picture.png' +----------------------------------------------------------+ | text | +----------------------------------------------------------+ | 生活不是等待风暴过去,而是学会在雨中翩翩起舞。 ——佚名 | +----------------------------------------------------------+ RAGFlow(user)> list 'test@gitee' tasks; +---------+----------------------------------+ | status | task_id | +---------+----------------------------------+ | success | C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5 | +---------+----------------------------------+ RAGFlow(user)> show 'test@gitee' task 'C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5'; +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | content | index | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | # PDF 1: Purpose of RAGFlow RAGFlow is an open source Retrieval-Augmented Generation (RAG) engine designed to turn raw documents into reliable context for large language models.Its purpose is to make it practical to build an Al assistant that can ans... | 1 | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-05-15 12:29:52 +08:00
}
func (n *NvidiaModel) ShowTask(taskID string, apiConfig *APIConfig) (*TaskResponse, error) {
return nil, fmt.Errorf("%s, no such method", n.Name())
Go: add file parse command (#14892) ### What problem does this PR solve? ``` RAGFlow(user)> ocr with 'hunyuanocr@test@gitee' file './picture.png' +----------------------------------------------------------+ | text | +----------------------------------------------------------+ | 生活不是等待风暴过去,而是学会在雨中翩翩起舞。 ——佚名 | +----------------------------------------------------------+ RAGFlow(user)> list 'test@gitee' tasks; +---------+----------------------------------+ | status | task_id | +---------+----------------------------------+ | success | C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5 | +---------+----------------------------------+ RAGFlow(user)> show 'test@gitee' task 'C3FX4MQNKY5MGC6ZFMIXIAMJKHCEBQB5'; +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | content | index | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ | # PDF 1: Purpose of RAGFlow RAGFlow is an open source Retrieval-Augmented Generation (RAG) engine designed to turn raw documents into reliable context for large language models.Its purpose is to make it practical to build an Al assistant that can ans... | 1 | +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-05-15 12:29:52 +08:00
}