Add TogetherAI chat provider (#14957)

## What - Add TogetherAI as a chat provider backed by its OpenAI-compatible `/v1/chat/completions` API - Register TogetherAI in the Go model factory and provider config - Support non-streaming chat, SSE streaming chat, model listing, and connection checks ## Notes - Uses the current TogetherAI OpenAI-compatible base URL `https://api.together.ai/v1` - Forwards documented chat parameters from `ChatConfig`: `max_tokens`, `temperature`, `top_p`, `stop`, and GPT-OSS `reasoning_effort` - Routes Together reasoning traces from `reasoning` / `reasoning_content` into `ReasonContent` ## Tests - `go test -vet=off -run TestTogetherAI -count=1 ./internal/entity/models` - `go test -vet=off -count=1 ./internal/entity/models` Refs #14736
2026-07-04 09:39:32 +08:00 · 2026-05-18 21:10:42 -10:00
parent 09a06f1b00
commit 243d9ed281
4 changed files with 743 additions and 0 deletions
--- a/conf/models/togetherai.json
+++ b/conf/models/togetherai.json
@@ -0,0 +1,34 @@
+{
+  "name": "TogetherAI",
+  "url": {
+    "default": "https://api.together.ai/v1"
+  },
+  "url_suffix": {
+    "chat": "chat/completions",
+    "models": "models"
+  },
+  "class": "together",
+  "models": [
+    {
+      "name": "openai/gpt-oss-20b",
+      "max_tokens": 131072,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+      "max_tokens": 131072,
+      "model_types": [
+        "chat"
+      ]
+    },
+    {
+      "name": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
+      "max_tokens": 262144,
+      "model_types": [
+        "chat"
+      ]
+    }
+  ]
+}
--- a/internal/entity/models/factory.go
+++ b/internal/entity/models/factory.go
@@ -97,6 +97,8 @@ func (f *ModelFactory) CreateModelDriver(providerName string, baseURL map[string
 		return NewNovitaModel(baseURL, urlSuffix), nil
 	case "replicate":
 		return NewReplicateModel(baseURL, urlSuffix), nil
+	case "togetherai":
+		return NewTogetherAIModel(baseURL, urlSuffix), nil
 	case "voyage":
 		return NewVoyageModel(baseURL, urlSuffix), nil
 	case "paddleocr":
--- a/internal/entity/models/togetherai.go
+++ b/internal/entity/models/togetherai.go
@@ -0,0 +1,430 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package models
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+)
+
+type TogetherAIModel struct {
+	BaseURL    map[string]string
+	URLSuffix  URLSuffix
+	httpClient *http.Client
+}
+
+func NewTogetherAIModel(baseURL map[string]string, urlSuffix URLSuffix) *TogetherAIModel {
+	transport := http.DefaultTransport.(*http.Transport).Clone()
+	transport.MaxIdleConns = 100
+	transport.MaxIdleConnsPerHost = 10
+	transport.IdleConnTimeout = 90 * time.Second
+	transport.DisableCompression = false
+	transport.ResponseHeaderTimeout = 60 * time.Second
+
+	return &TogetherAIModel{
+		BaseURL:   baseURL,
+		URLSuffix: urlSuffix,
+		httpClient: &http.Client{
+			Transport: transport,
+		},
+	}
+}
+
+func (t *TogetherAIModel) NewInstance(baseURL map[string]string) ModelDriver {
+	return NewTogetherAIModel(baseURL, t.URLSuffix)
+}
+
+func (t *TogetherAIModel) Name() string {
+	return "togetherai"
+}
+
+func (t *TogetherAIModel) baseURLForRegion(region string) (string, error) {
+	base, ok := t.BaseURL[region]
+	if !ok || base == "" {
+		return "", fmt.Errorf("togetherai: no base URL configured for region %q", region)
+	}
+	return strings.TrimSuffix(base, "/"), nil
+}
+
+type togetherAIReasoningOptions struct {
+	Enabled bool `json:"enabled"`
+}
+
+func (t *TogetherAIModel) chatPayload(modelName string, messages []Message, stream bool, chatModelConfig *ChatConfig) map[string]interface{} {
+	apiMessages := make([]map[string]interface{}, len(messages))
+	for i, msg := range messages {
+		apiMessages[i] = map[string]interface{}{
+			"role":    msg.Role,
+			"content": msg.Content,
+		}
+	}
+
+	reqBody := map[string]interface{}{
+		"model":    modelName,
+		"messages": apiMessages,
+		"stream":   stream,
+	}
+
+	if chatModelConfig != nil {
+		if chatModelConfig.MaxTokens != nil {
+			reqBody["max_tokens"] = *chatModelConfig.MaxTokens
+		}
+		if chatModelConfig.Temperature != nil {
+			reqBody["temperature"] = *chatModelConfig.Temperature
+		}
+		if chatModelConfig.TopP != nil {
+			reqBody["top_p"] = *chatModelConfig.TopP
+		}
+		if chatModelConfig.Stop != nil {
+			reqBody["stop"] = *chatModelConfig.Stop
+		}
+		if chatModelConfig.Thinking != nil {
+			reqBody["reasoning"] = togetherAIReasoningOptions{
+				Enabled: *chatModelConfig.Thinking,
+			}
+		}
+		if chatModelConfig.Effort != nil && strings.Contains(strings.ToLower(modelName), "gpt-oss") {
+			reqBody["reasoning_effort"] = *chatModelConfig.Effort
+		}
+	}
+
+	return reqBody
+}
+
+func (t *TogetherAIModel) chatURL(apiConfig *APIConfig) (string, error) {
+	region := "default"
+	if apiConfig != nil && apiConfig.Region != nil && *apiConfig.Region != "" {
+		region = *apiConfig.Region
+	}
+
+	baseURL, err := t.baseURLForRegion(region)
+	if err != nil {
+		return "", err
+	}
+	return fmt.Sprintf("%s/%s", baseURL, t.URLSuffix.Chat), nil
+}
+
+type togetherAIChatMessage struct {
+	Content          string `json:"content"`
+	ReasoningContent string `json:"reasoning_content"`
+	Reasoning        string `json:"reasoning"`
+}
+
+type togetherAIChatChoice struct {
+	Message      togetherAIChatMessage `json:"message"`
+	Delta        togetherAIChatMessage `json:"delta"`
+	FinishReason string                `json:"finish_reason"`
+}
+
+type togetherAIChatResponse struct {
+	Choices      []togetherAIChatChoice `json:"choices"`
+	Error        interface{}            `json:"error"`
+	FinishReason string                 `json:"finish_reason"`
+}
+
+func (t *TogetherAIModel) ChatWithMessages(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig) (*ChatResponse, error) {
+	if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
+		return nil, fmt.Errorf("api key is required")
+	}
+	if strings.TrimSpace(modelName) == "" {
+		return nil, fmt.Errorf("model name is required")
+	}
+	if len(messages) == 0 {
+		return nil, fmt.Errorf("messages is empty")
+	}
+
+	url, err := t.chatURL(apiConfig)
+	if err != nil {
+		return nil, err
+	}
+
+	jsonData, err := json.Marshal(t.chatPayload(modelName, messages, false, chatModelConfig))
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := t.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	var result togetherAIChatResponse
+	if err = json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+	if result.Error != nil {
+		return nil, fmt.Errorf("togetherai: upstream error: %v", result.Error)
+	}
+	if len(result.Choices) == 0 {
+		return nil, fmt.Errorf("no choices in response")
+	}
+
+	content := result.Choices[0].Message.Content
+	reasonContent := result.Choices[0].Message.ReasoningContent
+	if reasonContent == "" {
+		reasonContent = result.Choices[0].Message.Reasoning
+	}
+	return &ChatResponse{
+		Answer:        &content,
+		ReasonContent: &reasonContent,
+	}, nil
+}
+
+const togetherAIStreamTimeout = 10 * time.Minute
+
+func (t *TogetherAIModel) ChatStreamlyWithSender(modelName string, messages []Message, apiConfig *APIConfig, chatModelConfig *ChatConfig, sender func(*string, *string) error) error {
+	if sender == nil {
+		return fmt.Errorf("sender is required")
+	}
+	if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
+		return fmt.Errorf("api key is required")
+	}
+	if strings.TrimSpace(modelName) == "" {
+		return fmt.Errorf("model name is required")
+	}
+	if len(messages) == 0 {
+		return fmt.Errorf("messages is empty")
+	}
+	if chatModelConfig != nil && chatModelConfig.Stream != nil && !*chatModelConfig.Stream {
+		return fmt.Errorf("stream must be true in ChatStreamlyWithSender")
+	}
+
+	url, err := t.chatURL(apiConfig)
+	if err != nil {
+		return err
+	}
+
+	jsonData, err := json.Marshal(t.chatPayload(modelName, messages, true, chatModelConfig))
+	if err != nil {
+		return fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	// ResponseHeaderTimeout caps the initial header wait. This context
+	// also caps the body-read phase so a stalled SSE stream cannot hold
+	// the caller's goroutine and connection indefinitely.
+	ctx, cancel := context.WithTimeout(context.Background(), togetherAIStreamTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+	req.Header.Set("Accept", "text/event-stream")
+
+	resp, err := t.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	scanner := bufio.NewScanner(resp.Body)
+	scanner.Buffer(make([]byte, 64*1024), 1024*1024)
+	sawTerminal := false
+	for scanner.Scan() {
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "data:") {
+			continue
+		}
+
+		data := strings.TrimSpace(line[5:])
+		if data == "[DONE]" {
+			sawTerminal = true
+			break
+		}
+
+		var event togetherAIChatResponse
+		if err = json.Unmarshal([]byte(data), &event); err != nil {
+			return fmt.Errorf("togetherai: invalid SSE event: %w", err)
+		}
+		if event.Error != nil {
+			return fmt.Errorf("togetherai: upstream stream error: %v", event.Error)
+		}
+		if len(event.Choices) == 0 {
+			continue
+		}
+
+		choice := event.Choices[0]
+		if choice.Delta.ReasoningContent != "" {
+			if err := sender(nil, &choice.Delta.ReasoningContent); err != nil {
+				return err
+			}
+		}
+		if choice.Delta.Reasoning != "" {
+			if err := sender(nil, &choice.Delta.Reasoning); err != nil {
+				return err
+			}
+		}
+		if choice.Delta.Content != "" {
+			if err := sender(&choice.Delta.Content, nil); err != nil {
+				return err
+			}
+		}
+		if choice.FinishReason != "" || event.FinishReason != "" {
+			sawTerminal = true
+			break
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		return fmt.Errorf("failed to scan response body: %w", err)
+	}
+	if !sawTerminal {
+		return fmt.Errorf("togetherai: stream ended before [DONE] or finish_reason")
+	}
+
+	endOfStream := "[DONE]"
+	return sender(&endOfStream, nil)
+}
+
+type togetherAIModelInfo struct {
+	ID string `json:"id"`
+}
+
+func (t *TogetherAIModel) ListModels(apiConfig *APIConfig) ([]string, error) {
+	if apiConfig == nil || apiConfig.ApiKey == nil || *apiConfig.ApiKey == "" {
+		return nil, fmt.Errorf("api key is required")
+	}
+
+	region := "default"
+	if apiConfig.Region != nil && *apiConfig.Region != "" {
+		region = *apiConfig.Region
+	}
+
+	baseURL, err := t.baseURLForRegion(region)
+	if err != nil {
+		return nil, err
+	}
+	url := fmt.Sprintf("%s/%s", baseURL, t.URLSuffix.Models)
+
+	ctx, cancel := context.WithTimeout(context.Background(), nonStreamCallTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
+
+	resp, err := t.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to send request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	var result []togetherAIModelInfo
+	if err = json.Unmarshal(body, &result); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+
+	models := make([]string, 0, len(result))
+	for _, model := range result {
+		if model.ID != "" {
+			models = append(models, model.ID)
+		}
+	}
+	return models, nil
+}
+
+func (t *TogetherAIModel) CheckConnection(apiConfig *APIConfig) error {
+	_, err := t.ListModels(apiConfig)
+	return err
+}
+
+func (t *TogetherAIModel) Embed(modelName *string, texts []string, apiConfig *APIConfig, embeddingConfig *EmbeddingConfig) ([]EmbeddingData, error) {
+	return nil, fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) Rerank(modelName *string, query string, documents []string, apiConfig *APIConfig, rerankConfig *RerankConfig) (*RerankResponse, error) {
+	return nil, fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error) {
+	return nil, fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) TranscribeAudio(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig) (*ASRResponse, error) {
+	return nil, fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) TranscribeAudioWithSender(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig, sender func(*string, *string) error) error {
+	return fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) AudioSpeech(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig) (*TTSResponse, error) {
+	return nil, fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) AudioSpeechWithSender(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig, sender func(*string, *string) error) error {
+	return fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) OCRFile(modelName *string, content []byte, url *string, apiConfig *APIConfig, ocrConfig *OCRConfig) (*OCRFileResponse, error) {
+	return nil, fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) ParseFile(modelName *string, content []byte, url *string, apiConfig *APIConfig, parseFileConfig *ParseFileConfig) (*ParseFileResponse, error) {
+	return nil, fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) ListTasks(apiConfig *APIConfig) ([]ListTaskStatus, error) {
+	return nil, fmt.Errorf("%s, no such method", t.Name())
+}
+
+func (t *TogetherAIModel) ShowTask(taskID string, apiConfig *APIConfig) (*TaskResponse, error) {
+	return nil, fmt.Errorf("%s, no such method", t.Name())
+}
--- a/internal/entity/models/togetherai_test.go
+++ b/internal/entity/models/togetherai_test.go
@@ -0,0 +1,277 @@
+package models
+
+import (
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+func newTogetherAIServer(t *testing.T, handler func(t *testing.T, r *http.Request, body map[string]interface{}, w http.ResponseWriter)) *httptest.Server {
+	t.Helper()
+	return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if got := r.Header.Get("Authorization"); got != "Bearer test-key" {
+			t.Errorf("expected Authorization=Bearer test-key, got %q", got)
+			return
+		}
+		if got := r.Header.Get("Content-Type"); !strings.HasPrefix(got, "application/json") {
+			t.Errorf("expected Content-Type to start with application/json, got %q", got)
+			return
+		}
+		var body map[string]interface{}
+		if r.Method == http.MethodPost {
+			raw, err := io.ReadAll(r.Body)
+			if err != nil {
+				t.Errorf("read body: %v", err)
+				return
+			}
+			if err := json.Unmarshal(raw, &body); err != nil {
+				t.Errorf("unmarshal: %v\nraw=%s", err, string(raw))
+				return
+			}
+		}
+		handler(t, r, body, w)
+	}))
+}
+
+func newTogetherAIForTest(baseURL string) *TogetherAIModel {
+	return NewTogetherAIModel(
+		map[string]string{"default": baseURL},
+		URLSuffix{Chat: "chat/completions", Models: "models"},
+	)
+}
+
+func TestTogetherAIName(t *testing.T) {
+	if got := newTogetherAIForTest("http://unused").Name(); got != "togetherai" {
+		t.Errorf("Name()=%q", got)
+	}
+}
+
+func TestTogetherAIFactory(t *testing.T) {
+	driver, err := NewModelFactory().CreateModelDriver("TogetherAI", map[string]string{"default": "http://unused"}, URLSuffix{})
+	if err != nil {
+		t.Fatalf("CreateModelDriver: %v", err)
+	}
+	if _, ok := driver.(*TogetherAIModel); !ok {
+		t.Fatalf("driver type=%T, want *TogetherAIModel", driver)
+	}
+}
+
+func TestTogetherAIChatHappyPath(t *testing.T) {
+	srv := newTogetherAIServer(t, func(t *testing.T, r *http.Request, body map[string]interface{}, w http.ResponseWriter) {
+		if r.URL.Path != "/chat/completions" {
+			t.Errorf("path=%s", r.URL.Path)
+		}
+		if body["model"] != "openai/gpt-oss-20b" {
+			t.Errorf("model=%v", body["model"])
+		}
+		if body["stream"] != false {
+			t.Errorf("stream=%v want false", body["stream"])
+		}
+		if body["reasoning_effort"] != "high" {
+			t.Errorf("reasoning_effort=%v", body["reasoning_effort"])
+		}
+		_ = json.NewEncoder(w).Encode(map[string]interface{}{
+			"choices": []map[string]interface{}{{
+				"message": map[string]interface{}{
+					"content":   "pong",
+					"reasoning": "thinking",
+				},
+			}},
+		})
+	})
+	defer srv.Close()
+
+	apiKey := "test-key"
+	mt := 32
+	temp := 0.3
+	topP := 0.9
+	stop := []string{"END"}
+	effort := "high"
+	resp, err := newTogetherAIForTest(srv.URL).ChatWithMessages(
+		"openai/gpt-oss-20b",
+		[]Message{{Role: "user", Content: "ping"}},
+		&APIConfig{ApiKey: &apiKey},
+		&ChatConfig{MaxTokens: &mt, Temperature: &temp, TopP: &topP, Stop: &stop, Effort: &effort},
+	)
+	if err != nil {
+		t.Fatalf("ChatWithMessages: %v", err)
+	}
+	if *resp.Answer != "pong" {
+		t.Errorf("Answer=%q", *resp.Answer)
+	}
+	if *resp.ReasonContent != "thinking" {
+		t.Errorf("ReasonContent=%q", *resp.ReasonContent)
+	}
+}
+
+func TestTogetherAIChatForwardsReasoningEnabled(t *testing.T) {
+	srv := newTogetherAIServer(t, func(t *testing.T, r *http.Request, body map[string]interface{}, w http.ResponseWriter) {
+		if body["model"] != "Qwen/Qwen3.5-9B" {
+			t.Errorf("model=%v", body["model"])
+		}
+		reasoning, ok := body["reasoning"].(map[string]interface{})
+		if !ok {
+			t.Fatalf("reasoning=%T, want object", body["reasoning"])
+		}
+		if reasoning["enabled"] != false {
+			t.Errorf("reasoning.enabled=%v, want false", reasoning["enabled"])
+		}
+		if _, ok := body["reasoning_effort"]; ok {
+			t.Errorf("reasoning_effort should not be sent for non-GPT-OSS model: %v", body["reasoning_effort"])
+		}
+		_ = json.NewEncoder(w).Encode(map[string]interface{}{
+			"choices": []map[string]interface{}{{
+				"message": map[string]interface{}{
+					"content": "pong",
+				},
+			}},
+		})
+	})
+	defer srv.Close()
+
+	apiKey := "test-key"
+	thinking := false
+	resp, err := newTogetherAIForTest(srv.URL).ChatWithMessages(
+		"Qwen/Qwen3.5-9B",
+		[]Message{{Role: "user", Content: "ping"}},
+		&APIConfig{ApiKey: &apiKey},
+		&ChatConfig{Thinking: &thinking},
+	)
+	if err != nil {
+		t.Fatalf("ChatWithMessages: %v", err)
+	}
+	if *resp.Answer != "pong" {
+		t.Errorf("Answer=%q", *resp.Answer)
+	}
+}
+
+func TestTogetherAIChatRequiresModelName(t *testing.T) {
+	apiKey := "test-key"
+	_, err := newTogetherAIForTest("http://unused").ChatWithMessages("", []Message{{Role: "user", Content: "x"}}, &APIConfig{ApiKey: &apiKey}, nil)
+	if err == nil || !strings.Contains(err.Error(), "model name is required") {
+		t.Errorf("expected model-name error, got %v", err)
+	}
+}
+
+func TestTogetherAIStreamHappyPath(t *testing.T) {
+	srv := newTogetherAIServer(t, func(t *testing.T, r *http.Request, body map[string]interface{}, w http.ResponseWriter) {
+		if r.URL.Path != "/chat/completions" {
+			t.Errorf("path=%s", r.URL.Path)
+		}
+		if body["stream"] != true {
+			t.Errorf("stream=%v want true", body["stream"])
+		}
+		if got := r.Header.Get("Accept"); got != "text/event-stream" {
+			t.Errorf("Accept=%q", got)
+		}
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = io.WriteString(w,
+			`data: {"choices":[{"delta":{"reasoning":"think "}}]}`+"\n"+
+				`data: {"choices":[{"delta":{"content":"Hello"}}]}`+"\n"+
+				`data: {"choices":[{"delta":{"content":" world"},"finish_reason":"stop"}]}`+"\n",
+		)
+	})
+	defer srv.Close()
+
+	apiKey := "test-key"
+	var content []string
+	var reasoning []string
+	err := newTogetherAIForTest(srv.URL).ChatStreamlyWithSender(
+		"meta-llama/Llama-3.3-70B-Instruct-Turbo",
+		[]Message{{Role: "user", Content: "hi"}},
+		&APIConfig{ApiKey: &apiKey}, nil,
+		func(c *string, r *string) error {
+			if c != nil {
+				content = append(content, *c)
+			}
+			if r != nil {
+				reasoning = append(reasoning, *r)
+			}
+			return nil
+		},
+	)
+	if err != nil {
+		t.Fatalf("ChatStreamlyWithSender: %v", err)
+	}
+	if strings.Join(content, "") != "Hello world[DONE]" {
+		t.Errorf("content=%q", strings.Join(content, ""))
+	}
+	if strings.Join(reasoning, "") != "think " {
+		t.Errorf("reasoning=%q", strings.Join(reasoning, ""))
+	}
+}
+
+func TestTogetherAIStreamStopsOnRootFinishReason(t *testing.T) {
+	srv := newTogetherAIServer(t, func(t *testing.T, r *http.Request, body map[string]interface{}, w http.ResponseWriter) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = io.WriteString(w,
+			`data: {"choices":[{"delta":{"content":"Done"}}],"finish_reason":"stop"}`+"\n",
+		)
+	})
+	defer srv.Close()
+
+	apiKey := "test-key"
+	var chunks []string
+	err := newTogetherAIForTest(srv.URL).ChatStreamlyWithSender(
+		"meta-llama/Llama-3.3-70B-Instruct-Turbo",
+		[]Message{{Role: "user", Content: "hi"}},
+		&APIConfig{ApiKey: &apiKey}, nil,
+		func(c *string, _ *string) error {
+			if c != nil {
+				chunks = append(chunks, *c)
+			}
+			return nil
+		},
+	)
+	if err != nil {
+		t.Fatalf("ChatStreamlyWithSender: %v", err)
+	}
+	if strings.Join(chunks, "") != "Done[DONE]" {
+		t.Errorf("chunks=%q", strings.Join(chunks, ""))
+	}
+}
+
+func TestTogetherAIListModelsAndCheckConnection(t *testing.T) {
+	srv := newTogetherAIServer(t, func(t *testing.T, r *http.Request, body map[string]interface{}, w http.ResponseWriter) {
+		if r.Method != http.MethodGet {
+			t.Errorf("method=%s", r.Method)
+		}
+		if r.URL.Path != "/models" {
+			t.Errorf("path=%s", r.URL.Path)
+		}
+		_ = json.NewEncoder(w).Encode([]map[string]interface{}{
+			{"id": "openai/gpt-oss-20b"},
+			{"id": "meta-llama/Llama-3.3-70B-Instruct-Turbo"},
+		})
+	})
+	defer srv.Close()
+
+	apiKey := "test-key"
+	model := newTogetherAIForTest(srv.URL)
+	models, err := model.ListModels(&APIConfig{ApiKey: &apiKey})
+	if err != nil {
+		t.Fatalf("ListModels: %v", err)
+	}
+	if strings.Join(models, ",") != "openai/gpt-oss-20b,meta-llama/Llama-3.3-70B-Instruct-Turbo" {
+		t.Errorf("models=%v", models)
+	}
+	if err := model.CheckConnection(&APIConfig{ApiKey: &apiKey}); err != nil {
+		t.Fatalf("CheckConnection: %v", err)
+	}
+}
+
+func TestTogetherAIUnsupportedMethods(t *testing.T) {
+	m := newTogetherAIForTest("http://unused")
+	if _, err := m.Embed(nil, nil, nil, nil); err == nil || !strings.Contains(err.Error(), "no such method") {
+		t.Errorf("Embed error=%v", err)
+	}
+	if _, err := m.Rerank(nil, "", nil, nil, nil); err == nil || !strings.Contains(err.Error(), "no such method") {
+		t.Errorf("Rerank error=%v", err)
+	}
+	if _, err := m.Balance(nil); err == nil || !strings.Contains(err.Error(), "no such method") {
+		t.Errorf("Balance error=%v", err)
+	}
+}