2026-06-03 16:33:58 +08:00
|
|
|
//
|
|
|
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
|
|
|
//
|
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
|
//
|
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
//
|
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
//
|
|
|
|
|
|
2026-05-07 14:17:57 +08:00
|
|
|
package models
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bufio"
|
|
|
|
|
"bytes"
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
"context"
|
2026-05-07 14:17:57 +08:00
|
|
|
"encoding/json"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io"
|
|
|
|
|
"net/http"
|
|
|
|
|
"strings"
|
|
|
|
|
"time"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// NvidiaModel implements ModelDriver for Nvidia
|
|
|
|
|
type NvidiaModel struct {
|
2026-06-04 17:50:22 +08:00
|
|
|
baseModel BaseModel
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NewNvidiaModel creates a new Nvidia model instance
|
|
|
|
|
func NewNvidiaModel(baseURL map[string]string, urlSuffix URLSuffix) *NvidiaModel {
|
|
|
|
|
return &NvidiaModel{
|
2026-06-04 17:50:22 +08:00
|
|
|
baseModel: BaseModel{
|
|
|
|
|
BaseURL: baseURL,
|
|
|
|
|
URLSuffix: urlSuffix,
|
|
|
|
|
httpClient: &http.Client{
|
|
|
|
|
Transport: &http.Transport{
|
|
|
|
|
MaxIdleConns: 100,
|
|
|
|
|
MaxIdleConnsPerHost: 10,
|
|
|
|
|
IdleConnTimeout: 90 * time.Second,
|
|
|
|
|
DisableCompression: false,
|
|
|
|
|
},
|
2026-05-07 14:17:57 +08:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (n NvidiaModel) NewInstance(baseURL map[string]string) ModelDriver {
|
2026-06-04 17:50:22 +08:00
|
|
|
return NewNvidiaModel(baseURL, n.baseModel.URLSuffix)
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (n NvidiaModel) Name() string {
|
|
|
|
|
return "nvidia"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (n *NvidiaModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
|
2026-06-04 17:50:22 +08:00
|
|
|
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-07 14:17:57 +08:00
|
|
|
if len(messages) == 0 {
|
|
|
|
|
return nil, fmt.Errorf("messages is empty")
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL := resolvedBaseURL
|
2026-05-07 14:17:57 +08:00
|
|
|
if baseURL == "" {
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL = resolvedBaseURL
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
2026-06-04 17:50:22 +08:00
|
|
|
url := fmt.Sprintf("%s/%s", baseURL, n.baseModel.URLSuffix.Chat)
|
2026-05-07 14:17:57 +08:00
|
|
|
|
|
|
|
|
apiMessages := make([]map[string]interface{}, len(messages))
|
|
|
|
|
for i, msg := range messages {
|
|
|
|
|
apiMessages[i] = map[string]interface{}{
|
|
|
|
|
"role": msg.Role,
|
|
|
|
|
"content": msg.Content,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
reqBody := map[string]interface{}{
|
|
|
|
|
"model": modelName,
|
|
|
|
|
"messages": apiMessages,
|
|
|
|
|
"stream": false,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if chatModelConfig != nil {
|
|
|
|
|
if chatModelConfig.Stream != nil {
|
|
|
|
|
reqBody["stream"] = *chatModelConfig.Stream
|
|
|
|
|
}
|
|
|
|
|
if chatModelConfig.MaxTokens != nil {
|
|
|
|
|
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
|
|
|
|
|
}
|
|
|
|
|
if chatModelConfig.Temperature != nil {
|
|
|
|
|
reqBody["temperature"] = *chatModelConfig.Temperature
|
|
|
|
|
}
|
|
|
|
|
if chatModelConfig.TopP != nil {
|
|
|
|
|
reqBody["top_p"] = *chatModelConfig.TopP
|
|
|
|
|
}
|
|
|
|
|
if chatModelConfig.Stop != nil {
|
|
|
|
|
reqBody["stop"] = *chatModelConfig.Stop
|
|
|
|
|
}
|
|
|
|
|
if chatModelConfig.Thinking != nil {
|
|
|
|
|
if *chatModelConfig.Thinking {
|
|
|
|
|
reqBody["thinking"] = map[string]interface{}{"type": "enabled"}
|
|
|
|
|
} else {
|
|
|
|
|
reqBody["thinking"] = map[string]interface{}{"type": "disabled"}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
jsonData, err := json.Marshal(reqBody)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-02 03:27:26 -04:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
|
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
2026-05-07 14:17:57 +08:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req.Header.Set("Content-Type", "application/json")
|
2026-06-04 17:50:22 +08:00
|
|
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
2026-05-07 14:17:57 +08:00
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resp, err := n.baseModel.httpClient.Do(req)
|
2026-05-07 14:17:57 +08:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var result map[string]interface{}
|
|
|
|
|
if err = json.Unmarshal(body, &result); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to parse response: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
choices, ok := result["choices"].([]interface{})
|
|
|
|
|
if !ok || len(choices) == 0 {
|
|
|
|
|
return nil, fmt.Errorf("no choices in response")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
firstChoice, ok := choices[0].(map[string]interface{})
|
|
|
|
|
if !ok {
|
|
|
|
|
return nil, fmt.Errorf("invalid choice format")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
messageMap, ok := firstChoice["message"].(map[string]interface{})
|
|
|
|
|
if !ok {
|
|
|
|
|
return nil, fmt.Errorf("invalid message format")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
content, ok := messageMap["content"].(string)
|
|
|
|
|
if !ok {
|
|
|
|
|
return nil, fmt.Errorf("invalid content format")
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-08 12:02:37 +08:00
|
|
|
var reasonContent string
|
|
|
|
|
if chatModelConfig != nil && chatModelConfig.Thinking != nil && *chatModelConfig.Thinking {
|
|
|
|
|
reasonContent, ok = messageMap["reasoning_content"].(string)
|
|
|
|
|
if !ok {
|
|
|
|
|
return nil, fmt.Errorf("invalid content format")
|
|
|
|
|
}
|
|
|
|
|
if reasonContent != "" && reasonContent[0] == '\n' {
|
|
|
|
|
reasonContent = reasonContent[1:]
|
|
|
|
|
}
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
chatResponse := &ChatResponse{
|
2026-05-08 12:02:37 +08:00
|
|
|
Answer: &content,
|
|
|
|
|
ReasonContent: &reasonContent,
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return chatResponse, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (n *NvidiaModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, modelConfig *ChatConfig, sender func(*string, *string) error) error {
|
2026-06-04 17:50:22 +08:00
|
|
|
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if sender == nil {
|
|
|
|
|
return fmt.Errorf("sender is required")
|
|
|
|
|
}
|
2026-05-07 14:17:57 +08:00
|
|
|
if len(messages) == 0 {
|
|
|
|
|
return fmt.Errorf("messages is empty")
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL := resolvedBaseURL
|
2026-05-07 14:17:57 +08:00
|
|
|
if baseURL == "" {
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL = resolvedBaseURL
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
2026-06-04 17:50:22 +08:00
|
|
|
url := fmt.Sprintf("%s/%s", baseURL, n.baseModel.URLSuffix.Chat)
|
2026-05-07 14:17:57 +08:00
|
|
|
|
|
|
|
|
apiMessages := make([]map[string]interface{}, len(messages))
|
|
|
|
|
for i, msg := range messages {
|
|
|
|
|
apiMessages[i] = map[string]interface{}{
|
|
|
|
|
"role": msg.Role,
|
|
|
|
|
"content": msg.Content,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
reqBody := map[string]interface{}{
|
|
|
|
|
"model": modelName,
|
|
|
|
|
"messages": apiMessages,
|
|
|
|
|
"stream": true,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if modelConfig != nil {
|
|
|
|
|
if modelConfig.Stream != nil {
|
|
|
|
|
reqBody["stream"] = *modelConfig.Stream
|
|
|
|
|
}
|
|
|
|
|
if modelConfig.MaxTokens != nil {
|
|
|
|
|
reqBody["max_tokens"] = *modelConfig.MaxTokens
|
|
|
|
|
}
|
|
|
|
|
if modelConfig.Temperature != nil {
|
|
|
|
|
reqBody["temperature"] = *modelConfig.Temperature
|
|
|
|
|
}
|
|
|
|
|
if modelConfig.DoSample != nil {
|
|
|
|
|
reqBody["do_sample"] = *modelConfig.DoSample
|
|
|
|
|
}
|
|
|
|
|
if modelConfig.TopP != nil {
|
|
|
|
|
reqBody["top_p"] = *modelConfig.TopP
|
|
|
|
|
}
|
|
|
|
|
if modelConfig.Stop != nil {
|
|
|
|
|
reqBody["stop"] = *modelConfig.Stop
|
|
|
|
|
}
|
|
|
|
|
if modelConfig.Thinking != nil {
|
|
|
|
|
if *modelConfig.Thinking {
|
|
|
|
|
reqBody["thinking"] = map[string]interface{}{"type": "enabled"}
|
|
|
|
|
} else {
|
|
|
|
|
reqBody["thinking"] = map[string]interface{}{"type": "disabled"}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
jsonData, err := json.Marshal(reqBody)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("failed to marshal request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-02 03:27:26 -04:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), streamCallTimeout)
|
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
2026-05-07 14:17:57 +08:00
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("failed to create request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req.Header.Set("Content-Type", "application/json")
|
2026-06-04 17:50:22 +08:00
|
|
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
2026-05-07 14:17:57 +08:00
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resp, err := n.baseModel.httpClient.Do(req)
|
2026-05-07 14:17:57 +08:00
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("failed to send request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
body, _ := io.ReadAll(resp.Body)
|
|
|
|
|
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
scanner := bufio.NewScanner(resp.Body)
|
fix(go-models): raise SSE scanner buffer so large stream chunks are not dropped (#15382)
### Summary
Closes #15381
Every provider in `internal/entity/models/` reads its streaming response
with `bufio.NewScanner(resp.Body)` and iterates over `scanner.Scan()`.
The default `bufio.Scanner` maximum token size is 64KB, so when an
upstream sends a single SSE `data:` line larger than 64KB (long content
deltas, large tool or function call argument blobs, bundled
`reasoning_content`, or providers that emit a whole message in one
event) `scanner.Scan()` returns `false` and `scanner.Err()` returns
`bufio.ErrTooLong`. Streaming chat then ends with an error partway
through the response.
This change adds `scanner.Buffer(make([]byte, 64*1024), 1024*1024)`
immediately after every SSE scanner that was still bare, raising the cap
to 1MB. 1MB is the value already used for streaming chat in `openai.go`,
`modelscope.go`, `groq.go`, `mistral.go`, `xai.go` and the other already
patched providers (the 8MB cap in the repo is reserved for TTS and
embedding paths), so this simply converges the remaining providers onto
the established pattern. Nothing else changes: line parsing, `data:`
prefix handling, `[DONE]` detection, JSON unmarshalling, error handling,
and the existing `scanner.Err()` checks all stay the same.
Providers covered (23 scanners across 22 files): 302ai, aliyun,
baichuan, baidu, cohere, deepinfra, deepseek, gitee, huggingface,
lmstudio, minimax (the chat scanner, whose TTS scanner was already
bumped), moonshot, nvidia, ollama, openrouter, orcarouter, paddleocr,
siliconflow, tokenhub, vllm, volcengine, xunfei, zhipu-ai. `jiekouai.go`
is excluded because it is covered by the in flight #15337.
A table driven regression test (`sse_scanner_buffer_test.go`) streams a
single 128KB `data:` content delta followed by `data: [DONE]` through an
`httptest` server and asserts that `ChatStreamlyWithSender` delivers the
full content with no error across a representative subset of providers.
Without the buffer fix the test fails with `bufio.Scanner: token too
long`.
This PR also removes three duplicate declarations of the package level
`roundTripperFunc` test helper that several recently merged provider PRs
each added independently, which had left the `internal/entity/models`
test package unable to compile. The helper now lives in a single place
and is shared.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
2026-05-29 07:34:00 -04:00
|
|
|
scanner.Buffer(make([]byte, 64*1024), 1024*1024)
|
2026-05-07 14:17:57 +08:00
|
|
|
for scanner.Scan() {
|
|
|
|
|
line := scanner.Text()
|
|
|
|
|
|
|
|
|
|
if !strings.HasPrefix(line, "data:") {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
data := strings.TrimSpace(line[5:])
|
|
|
|
|
if data == "[DONE]" {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var event map[string]interface{}
|
|
|
|
|
if err = json.Unmarshal([]byte(data), &event); err != nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
choices, ok := event["choices"].([]interface{})
|
|
|
|
|
if !ok || len(choices) == 0 {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
firstChoice, ok := choices[0].(map[string]interface{})
|
|
|
|
|
if !ok {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
delta, ok := firstChoice["delta"].(map[string]interface{})
|
|
|
|
|
if !ok {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
reasoningContent, ok := delta["reasoning_content"].(string)
|
|
|
|
|
if ok && reasoningContent != "" {
|
|
|
|
|
if err := sender(nil, &reasoningContent); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
content, ok := delta["content"].(string)
|
|
|
|
|
if ok && content != "" {
|
|
|
|
|
if err := sender(&content, nil); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
finishReason, ok := firstChoice["finish_reason"].(string)
|
|
|
|
|
if ok && finishReason != "" {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
endOfStream := "[DONE]"
|
|
|
|
|
if err = sender(&endOfStream, nil); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return scanner.Err()
|
|
|
|
|
}
|
|
|
|
|
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
type nvidiaEmbeddingResponse struct {
|
|
|
|
|
Data []struct {
|
2026-05-11 14:45:30 +08:00
|
|
|
Index int `json:"index"`
|
|
|
|
|
Embedding []float64 `json:"embedding"`
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
} `json:"data"`
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-11 14:45:30 +08:00
|
|
|
func (n NvidiaModel) Embed(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([]EmbeddingData, error) {
|
2026-06-04 17:50:22 +08:00
|
|
|
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
|
|
|
|
|
return nil, err
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
if len(texts) == 0 {
|
|
|
|
|
return []EmbeddingData{}, nil
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if modelName == nil || *modelName == "" {
|
|
|
|
|
return nil, fmt.Errorf("model name is required")
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
}
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL := resolvedBaseURL
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
if baseURL == "" {
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL = resolvedBaseURL
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
url := fmt.Sprintf("%s/%s", strings.TrimSuffix(baseURL, "/"), n.baseModel.URLSuffix.Embedding)
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
|
|
|
|
|
reqBody := map[string]interface{}{
|
|
|
|
|
"model": *modelName,
|
|
|
|
|
"input": texts,
|
|
|
|
|
"input_type": "query",
|
|
|
|
|
"encoding_format": "float",
|
|
|
|
|
"truncate": "END",
|
|
|
|
|
}
|
|
|
|
|
if embeddingConfig != nil && embeddingConfig.Dimension > 0 {
|
|
|
|
|
reqBody["dimensions"] = embeddingConfig.Dimension
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
jsonData, err := json.Marshal(reqBody)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-02 03:27:26 -04:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resp, err := n.baseModel.httpClient.Do(req)
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
return nil, fmt.Errorf("Nvidia embeddings API error: %s, body: %s", resp.Status, string(body))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var parsed nvidiaEmbeddingResponse
|
|
|
|
|
if err = json.Unmarshal(body, &parsed); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to parse response: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-11 14:45:30 +08:00
|
|
|
var embeddings []EmbeddingData
|
|
|
|
|
for _, dataElem := range parsed.Data {
|
|
|
|
|
var embeddingData EmbeddingData
|
|
|
|
|
embeddingData.Embedding = dataElem.Embedding
|
|
|
|
|
embeddingData.Index = dataElem.Index
|
|
|
|
|
embeddings = append(embeddings, embeddingData)
|
Go: implement Encode (embeddings) in NVIDIA driver (#14700)
### What problem does this PR solve?
The NVIDIA Go driver in `internal/entity/models/nvidia.go` shipped with
a stub `Encode`
method that returned `no such method`. `conf/models/nvidia.json` already
lists
`nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` as an embedding model,
but the conf had
no `embedding` URL suffix, so the picker had nothing wired even if
`Encode` worked.
A tenant who wanted to use NVIDIA NIM for chat (already working) and
embeddings from a
single provider could not, even though the upstream endpoint is public
at
`https://integrate.api.nvidia.com/v1/embeddings` and uses an
OpenAI-compatible request
body extended with the NVIDIA-specific `input_type` and `truncate`
fields. Several other
Go drivers already implement `Encode` (siliconflow, zhipu-ai, aliyun),
so the interface
and the pattern are well-established.
This PR fills the gap.
### What this PR includes
* `conf/models/nvidia.json`: declare the `embedding` URL suffix
alongside the existing
`chat` and `models` entries. The embedding model entry was already
present, so no
model addition is needed.
* `internal/entity/models/nvidia.go`: replace the `Encode` stub with a
real
implementation. Adds a small local response type that matches the
OpenAI-compatible
shape NVIDIA NIM returns.
No factory change. No interface change.
### How the driver works
* Validates `apiConfig` and the API key, validates the model name,
resolves the region
with a default fallback (matching the pattern the merged `ListModels`
and
`CheckConnection` paths in this driver already use), and builds the URL
from
`BaseURL[region] + URLSuffix.Embedding`.
* Sends all input texts in one request as the `input` array, with the
NVIDIA-specific `input_type: "query"`, `encoding_format: "float"`, and
`truncate: "END"`
fields, mirroring the Python `NvidiaEmbed` reference.
* Parses `data[*].embedding` and copies each slice into `[][]float64`
indexed by
`data[*].index` so the output order matches the input order even if the
API returns
items in a different order.
* Handles both `float64` and `float32` element types.
* Empty input returns `[][]float64{}` with no HTTP call.
* Non-200 responses propagate the upstream status line and body.
* A final pass checks every input slot got a vector and returns a clear
error if any
slot is still nil.
* Per-call 30s context deadline so a slow call cannot block forever.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### How was this tested?
* `go build ./internal/entity/models/...` returns exit 0.
* `go vet ./internal/entity/models/...` is clean.
* `gofmt -l internal/entity/models/nvidia.go` is clean.
* The full method set on `NvidiaModel` still matches the `ModelDriver`
interface.
* Pattern parity with the just-merged Aliyun `Encode` (#14647).
Closes #14699
2026-05-10 18:50:50 -10:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return embeddings, nil
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
|
|
|
|
|
2026-05-11 11:21:16 +02:00
|
|
|
// nvidiaRerankRequest mirrors the NIM /ranking request shape:
|
|
|
|
|
// query is an object with a "text" field, passages is an array of
|
|
|
|
|
// objects each with a "text" field. truncate=END matches the Python
|
|
|
|
|
// NvidiaRerank reference at rag/llm/rerank_model.py.
|
|
|
|
|
type nvidiaRerankRequest struct {
|
|
|
|
|
Model string `json:"model"`
|
|
|
|
|
Query nvidiaRerankText `json:"query"`
|
|
|
|
|
Passages []nvidiaRerankText `json:"passages"`
|
|
|
|
|
Truncate string `json:"truncate,omitempty"`
|
|
|
|
|
TopN int `json:"top_n"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type nvidiaRerankText struct {
|
|
|
|
|
Text string `json:"text"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// nvidiaRerankResponse maps the NIM rankings array. Each entry pairs
|
|
|
|
|
// the original passage index with a logit score; the caller uses the
|
|
|
|
|
// index to restore original input order.
|
|
|
|
|
type nvidiaRerankResponse struct {
|
|
|
|
|
Rankings []struct {
|
|
|
|
|
Index int `json:"index"`
|
|
|
|
|
Logit float64 `json:"logit"`
|
|
|
|
|
} `json:"rankings"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Rerank scores documents against the query using an NVIDIA NIM
|
|
|
|
|
// reranking model. Mirrors the Python NvidiaRerank class in
|
|
|
|
|
// rag/llm/rerank_model.py for payload shape (passages/query/logit).
|
|
|
|
|
// Defaults top_n to len(documents) so the API returns a score per
|
|
|
|
|
// input; callers may shrink it via RerankConfig.TopN, in which case
|
|
|
|
|
// only the top RerankConfig.TopN entries come back. Returned
|
|
|
|
|
// RerankResult entries are in the API's ranking order; callers that
|
|
|
|
|
// need original-input order should sort by Index. Same return-shape
|
|
|
|
|
// contract as the Aliyun and ZhipuAI Rerank drivers.
|
2026-05-09 17:41:54 +08:00
|
|
|
func (n NvidiaModel) Rerank(modelName *string, query string, documents []string, apiConfig *APIConfig, rerankConfig *RerankConfig) (*RerankResponse, error) {
|
2026-06-04 17:50:22 +08:00
|
|
|
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-11 11:21:16 +02:00
|
|
|
if len(documents) == 0 {
|
|
|
|
|
return &RerankResponse{}, nil
|
|
|
|
|
}
|
|
|
|
|
if modelName == nil || *modelName == "" {
|
|
|
|
|
return nil, fmt.Errorf("model name is required")
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
2026-05-11 11:21:16 +02:00
|
|
|
}
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL := resolvedBaseURL
|
2026-05-11 11:21:16 +02:00
|
|
|
if baseURL == "" {
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL = resolvedBaseURL
|
2026-05-11 11:21:16 +02:00
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
url := fmt.Sprintf("%s/%s", strings.TrimSuffix(baseURL, "/"), n.baseModel.URLSuffix.Rerank)
|
2026-05-11 11:21:16 +02:00
|
|
|
|
|
|
|
|
topN := len(documents)
|
|
|
|
|
if rerankConfig != nil && rerankConfig.TopN > 0 && rerankConfig.TopN < topN {
|
|
|
|
|
topN = rerankConfig.TopN
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
passages := make([]nvidiaRerankText, len(documents))
|
|
|
|
|
for i, doc := range documents {
|
|
|
|
|
passages[i] = nvidiaRerankText{Text: doc}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
reqBody := nvidiaRerankRequest{
|
|
|
|
|
Model: *modelName,
|
|
|
|
|
Query: nvidiaRerankText{Text: query},
|
|
|
|
|
Passages: passages,
|
|
|
|
|
Truncate: "END",
|
|
|
|
|
TopN: topN,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
jsonData, err := json.Marshal(reqBody)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-02 03:27:26 -04:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
|
2026-05-11 11:21:16 +02:00
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonData))
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resp, err := n.baseModel.httpClient.Do(req)
|
2026-05-11 11:21:16 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
return nil, fmt.Errorf("Nvidia rerank API error: %s, body: %s", resp.Status, string(body))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var parsed nvidiaRerankResponse
|
|
|
|
|
if err = json.Unmarshal(body, &parsed); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to parse response: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rerankResponse := RerankResponse{Data: make([]RerankResult, 0, len(parsed.Rankings))}
|
|
|
|
|
for _, r := range parsed.Rankings {
|
|
|
|
|
if r.Index < 0 || r.Index >= len(documents) {
|
|
|
|
|
return nil, fmt.Errorf("unexpected rerank index %d for %d inputs", r.Index, len(documents))
|
|
|
|
|
}
|
|
|
|
|
rerankResponse.Data = append(rerankResponse.Data, RerankResult{
|
|
|
|
|
Index: r.Index,
|
|
|
|
|
RelevanceScore: r.Logit,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return &rerankResponse, nil
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
|
|
|
|
|
2026-05-12 17:17:44 +08:00
|
|
|
// TranscribeAudio transcribe audio
|
|
|
|
|
func (n *NvidiaModel) TranscribeAudio(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig) (*ASRResponse, error) {
|
|
|
|
|
return nil, fmt.Errorf("%s, no such method", n.Name())
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-03 14:09:07 +08:00
|
|
|
func (n *NvidiaModel) TranscribeAudioWithSender(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig, sender func(*string, *string) error) error {
|
|
|
|
|
return fmt.Errorf("%s, no such method", n.Name())
|
2026-05-12 17:17:44 +08:00
|
|
|
}
|
|
|
|
|
|
2026-05-15 18:41:43 +08:00
|
|
|
// AudioSpeech convert text to audio
|
|
|
|
|
func (n *NvidiaModel) AudioSpeech(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig) (*TTSResponse, error) {
|
2026-05-12 17:17:44 +08:00
|
|
|
return nil, fmt.Errorf("%s, no such method", n.Name())
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-03 14:09:07 +08:00
|
|
|
func (n *NvidiaModel) AudioSpeechWithSender(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig, sender func(*string, *string) error) error {
|
|
|
|
|
return fmt.Errorf("%s, no such method", n.Name())
|
2026-05-12 17:17:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// OCRFile OCR file
|
2026-06-03 14:09:07 +08:00
|
|
|
func (n *NvidiaModel) OCRFile(modelName *string, content []byte, url *string, apiConfig *APIConfig, ocrConfig *OCRConfig) (*OCRFileResponse, error) {
|
|
|
|
|
return nil, fmt.Errorf("%s, no such method", n.Name())
|
2026-05-12 17:17:44 +08:00
|
|
|
}
|
|
|
|
|
|
2026-05-15 12:29:52 +08:00
|
|
|
// ParseFile parse file
|
2026-06-03 14:09:07 +08:00
|
|
|
func (n *NvidiaModel) ParseFile(modelName *string, content []byte, url *string, apiConfig *APIConfig, parseFileConfig *ParseFileConfig) (*ParseFileResponse, error) {
|
|
|
|
|
return nil, fmt.Errorf("%s, no such method", n.Name())
|
2026-05-15 12:29:52 +08:00
|
|
|
}
|
|
|
|
|
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
// ListModels calls /v1/models on the configured NVIDIA NIM base URL
|
|
|
|
|
// and returns the list of available model ids. The endpoint is
|
|
|
|
|
// OpenAI-compatible, so the parsing follows the same shape used by
|
|
|
|
|
// the moonshot, xai, and openai drivers.
|
2026-05-07 14:17:57 +08:00
|
|
|
func (n NvidiaModel) ListModels(apiConfig *APIConfig) ([]string, error) {
|
2026-06-04 17:50:22 +08:00
|
|
|
if err := n.baseModel.APIConfigCheck(apiConfig); err != nil {
|
|
|
|
|
return nil, err
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resolvedBaseURL, err := n.baseModel.GetBaseURL(apiConfig)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
}
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL := resolvedBaseURL
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
if baseURL == "" {
|
2026-06-04 17:50:22 +08:00
|
|
|
baseURL = resolvedBaseURL
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
url := fmt.Sprintf("%s/%s", baseURL, n.baseModel.URLSuffix.Models)
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
|
2026-06-02 03:27:26 -04:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
|
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
|
2026-06-04 17:50:22 +08:00
|
|
|
resp, err := n.baseModel.httpClient.Do(req)
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
body, err := io.ReadAll(resp.Body)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
return nil, fmt.Errorf("Nvidia models API error: %s, body: %s", resp.Status, string(body))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var result map[string]interface{}
|
|
|
|
|
if err = json.Unmarshal(body, &result); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to parse response: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
data, ok := result["data"].([]interface{})
|
|
|
|
|
if !ok {
|
|
|
|
|
return nil, fmt.Errorf("invalid models list format")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
models := make([]string, 0, len(data))
|
|
|
|
|
for _, item := range data {
|
|
|
|
|
m, ok := item.(map[string]interface{})
|
|
|
|
|
if !ok {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
id, ok := m["id"].(string)
|
|
|
|
|
if !ok {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
models = append(models, id)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return models, nil
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (n NvidiaModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) {
|
|
|
|
|
return nil, fmt.Errorf("no such method")
|
|
|
|
|
}
|
|
|
|
|
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
// CheckConnection verifies that the configured NVIDIA NIM base URL
|
|
|
|
|
// is reachable and that the API key is accepted, by issuing a
|
|
|
|
|
// lightweight ListModels call. Mirrors the pattern used by the xai,
|
|
|
|
|
// moonshot, deepseek, aliyun, and gitee drivers.
|
2026-05-07 14:17:57 +08:00
|
|
|
func (n NvidiaModel) CheckConnection(apiConfig *APIConfig) error {
|
fix(go): implement ListModels and CheckConnection in NVIDIA driver (#14636)
### What problem does this PR solve?
The NVIDIA Go driver added in #14623 has a real chat path, but
\`ListModels\` and \`CheckConnection\` are stubs that always return \`no
such method\`. So:
- The model picker cannot auto-populate available NVIDIA NIM model ids.
Users have to type the full id by hand (e.g.
\`abacusai/dracarys-llama-3.1-70b-instruct\`).
- The "Check connection" button always fails for NVIDIA, even when the
base URL is reachable and the API key is accepted.
NVIDIA NIM is OpenAI-compatible. \`/v1/models\` works with the same
Bearer token used for chat. The
\`conf/models/nvidia.json\` file already wires the \`models\`
url_suffix, so no config change is needed.
### What this PR includes
- \`internal/entity/models/nvidia.go\`:
- \`ListModels\` now calls
\`GET ${BaseURL}/${URLSuffix.Models}\`, parses
\`response.data[*].id\`, and returns the list. Same shape
as the moonshot, xai, and openai drivers.
- \`CheckConnection\` now calls \`ListModels\` and returns its
error. Same pattern xai, moonshot, deepseek, aliyun, and
gitee already use.
\`Balance\`, \`Encode\`, and \`Rerank\` are still stubs in this PR and
can be added in follow-ups.
No JSON change. No factory change. No interface change.
### How the implementation works
- Region resolution falls back to \`default\` when the supplied region
is unknown, so a stray region value does not break a valid request.
- The Authorization header is only set when \`apiConfig\` and \`ApiKey\`
are non-nil and non-empty. This avoids a nil-pointer dereference and
lets self-hosted NIM deployments without a key still work.
- Non-200 responses propagate the upstream status line and body so the
user sees a real error message.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
### How was this tested?
- \`go build ./internal/entity/models/...\` in a clean go 1.25 image
(the go.mod minimum) returns exit 0.
- The full method set on \`NvidiaModel\` still matches the
\`ModelDriver\` interface.
- Pattern parity with the existing xai, moonshot, deepseek, aliyun,
gitee, and openai drivers.
Closes #14635
2026-05-08 06:04:28 +02:00
|
|
|
_, err := n.ListModels(apiConfig)
|
|
|
|
|
return err
|
2026-05-07 14:17:57 +08:00
|
|
|
}
|
2026-05-15 12:29:52 +08:00
|
|
|
|
2026-06-03 14:09:07 +08:00
|
|
|
func (n *NvidiaModel) ListTasks(apiConfig *APIConfig) ([]ListTaskStatus, error) {
|
|
|
|
|
return nil, fmt.Errorf("%s, no such method", n.Name())
|
2026-05-15 12:29:52 +08:00
|
|
|
}
|
|
|
|
|
|
2026-06-03 14:09:07 +08:00
|
|
|
func (n *NvidiaModel) ShowTask(taskID string, apiConfig *APIConfig) (*TaskResponse, error) {
|
|
|
|
|
return nil, fmt.Errorf("%s, no such method", n.Name())
|
2026-05-15 12:29:52 +08:00
|
|
|
}
|