diff --git a/conf/models/ollama.json b/conf/models/ollama.json index 58adb17efe..9c1e460a82 100644 --- a/conf/models/ollama.json +++ b/conf/models/ollama.json @@ -1,9 +1,9 @@ { "name": "ollama", "url_suffix": { - "chat": "chat/completions", - "models": "models", - "embedding": "embeddings" + "chat": "api/chat", + "models": "api/ps", + "embedding": "api/embed" }, "class": "local" } \ No newline at end of file diff --git a/conf/models/xinference.json b/conf/models/xinference.json index bcb9ddc457..cf8fb61fa9 100644 --- a/conf/models/xinference.json +++ b/conf/models/xinference.json @@ -4,7 +4,9 @@ "chat": "v1/chat/completions", "embedding": "v1/embeddings", "models": "v1/models", - "rerank": "v1/rerank" + "rerank": "v1/rerank", + "asr": "v1/audio/transcriptions", + "tts": "v1/audio/speech" }, "class": "local" } diff --git a/internal/entity/models/ollama.go b/internal/entity/models/ollama.go index d95e9e8c73..bfeb26c865 100644 --- a/internal/entity/models/ollama.go +++ b/internal/entity/models/ollama.go @@ -8,7 +8,6 @@ import ( "fmt" "io" "net/http" - "ragflow/internal/common" "strings" "time" ) @@ -78,9 +77,15 @@ func (o *OllamaModel) ChatWithMessages(modelName string, messages []Message, api // Convert messages to API format apiMessages := make([]map[string]interface{}, len(messages)) for i, msg := range messages { + arr, _ := msg.Content.([]interface{}) + + first, _ := arr[0].(map[string]interface{}) + + text, _ := first["text"].(string) + apiMessages[i] = map[string]interface{}{ "role": msg.Role, - "content": msg.Content, + "content": text, } } @@ -113,15 +118,13 @@ func (o *OllamaModel) ChatWithMessages(modelName string, messages []Message, api reqBody["stop"] = *chatModelConfig.Stop } - if chatModelConfig.Thinking != nil { + if chatModelConfig.Effort != nil && *chatModelConfig.Effort != "" { + if strings.HasPrefix(strings.ToLower(modelName), "gpt-oss") { + reqBody["think"] = *chatModelConfig.Effort + } + } else if chatModelConfig.Thinking != nil { if *chatModelConfig.Thinking { - reqBody["thinking"] = map[string]interface{}{ - "type": "enabled", - } - } else { - reqBody["thinking"] = map[string]interface{}{ - "type": "disabled", - } + reqBody["think"] = true } } } @@ -137,7 +140,6 @@ func (o *OllamaModel) ChatWithMessages(modelName string, messages []Message, api } req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) resp, err := o.httpClient.Do(req) if err != nil { @@ -160,35 +162,19 @@ func (o *OllamaModel) ChatWithMessages(modelName string, messages []Message, api return nil, fmt.Errorf("failed to parse response: %w", err) } - choices, ok := result["choices"].([]interface{}) - if !ok || len(choices) == 0 { - return nil, fmt.Errorf("no choices in response") - } - - firstChoice, ok := choices[0].(map[string]interface{}) + message, ok := result["message"].(map[string]interface{}) if !ok { - return nil, fmt.Errorf("invalid choice format") + return nil, fmt.Errorf("failed to parse response: message not found") } - messageMap, ok := firstChoice["message"].(map[string]interface{}) + content, ok := message["content"].(string) if !ok { - return nil, fmt.Errorf("invalid message format") + return nil, fmt.Errorf("failed to parse response: content not found") } - content, ok := messageMap["content"].(string) + reasonContent, ok := message["thinking"].(string) if !ok { - return nil, fmt.Errorf("invalid content format") - } - - var reasonContent string - if chatModelConfig != nil && chatModelConfig.Thinking != nil && *chatModelConfig.Thinking { - reasonContent, ok = messageMap["reasoning_content"].(string) - if !ok { - return nil, fmt.Errorf("invalid content format") - } - if reasonContent != "" && reasonContent[0] == '\n' { - reasonContent = reasonContent[1:] - } + return nil, fmt.Errorf("failed to parse response: thinking not found") } chatResponse := &ChatResponse{ @@ -218,9 +204,15 @@ func (o *OllamaModel) ChatStreamlyWithSender(modelName string, messages []Messag // Convert messages to API format (supporting multimodal content) apiMessages := make([]map[string]interface{}, len(messages)) for i, msg := range messages { + arr, _ := msg.Content.([]interface{}) + + first, _ := arr[0].(map[string]interface{}) + + text, _ := first["text"].(string) + apiMessages[i] = map[string]interface{}{ "role": msg.Role, - "content": msg.Content, + "content": text, } } @@ -255,15 +247,13 @@ func (o *OllamaModel) ChatStreamlyWithSender(modelName string, messages []Messag reqBody["stop"] = *modelConfig.Stop } - if modelConfig.Thinking != nil { + if modelConfig.Effort != nil && *modelConfig.Effort != "" { + if strings.HasPrefix(strings.ToLower(modelName), "gpt-oss") { + reqBody["think"] = *modelConfig.Effort + } + } else if modelConfig.Thinking != nil { if *modelConfig.Thinking { - reqBody["thinking"] = map[string]interface{}{ - "type": "enabled", - } - } else { - reqBody["thinking"] = map[string]interface{}{ - "type": "disabled", - } + reqBody["think"] = true } } @@ -278,7 +268,6 @@ func (o *OllamaModel) ChatStreamlyWithSender(modelName string, messages []Messag } req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) resp, err := o.httpClient.Do(req) if err != nil { @@ -294,66 +283,40 @@ func (o *OllamaModel) ChatStreamlyWithSender(modelName string, messages []Messag // SSE parsing: read line by line scanner := bufio.NewScanner(resp.Body) for scanner.Scan() { - line := scanner.Text() - common.Info(line) + line := strings.TrimSpace(scanner.Text()) - // SSE data line starts with "data:" - if !strings.HasPrefix(line, "data:") { + // ignore the blank + if line == "" { continue } - // Extract JSON after "data:" - data := strings.TrimSpace(line[5:]) - - // [DONE] marks the end of stream - if data == "[DONE]" { - break - } - // Parse the JSON event var event map[string]interface{} - if err = json.Unmarshal([]byte(data), &event); err != nil { + if err = json.Unmarshal([]byte(line), &event); err != nil { continue } - choices, ok := event["choices"].([]interface{}) - if !ok || len(choices) == 0 { - continue - } - - firstChoice, ok := choices[0].(map[string]interface{}) - if !ok { - continue - } - - delta, ok := firstChoice["delta"].(map[string]interface{}) - if !ok { - continue - } - - reasoningContent, ok := delta["reasoning_content"].(string) - if ok && reasoningContent != "" { - if err := sender(nil, &reasoningContent); err != nil { - return err + if messageMap, ok := event["message"].(map[string]interface{}); ok { + if reasoningContent, exists := messageMap["thinking"].(string); exists && reasoningContent != "" { + if err := sender(nil, &reasoningContent); err != nil { + return err + } + } + if content, exists := messageMap["content"].(string); exists && content != "" { + if err := sender(&content, nil); err != nil { + return err + } } } - content, ok := delta["content"].(string) - if ok && content != "" { - if err := sender(&content, nil); err != nil { - return err - } - } - - finishReason, ok := firstChoice["finish_reason"].(string) - if ok && finishReason != "" { + if done, ok := event["done"].(bool); ok && done { break } } - // Send [DONE] marker for OpenAI compatibility + // Send [DONE] marker for OpenAI compatibility with RAGFlow frontend endOfStream := "[DONE]" - if err = sender(&endOfStream, nil); err != nil { + if err := sender(&endOfStream, nil); err != nil { return err } @@ -425,17 +388,30 @@ func (o *OllamaModel) Embed(modelName *string, texts []string, apiConfig *APICon return nil, fmt.Errorf("Ollama embeddings API error: %s, body: %s", resp.Status, string(body)) } - var parsed openaiEmbeddingResponse - if err = json.Unmarshal(body, &parsed); err != nil { - return nil, fmt.Errorf("failed to parse response: %w", err) + var embedResp struct { + Model string `json:"model"` + Embeddings [][]float64 `json:"embeddings"` } - var embeddings []EmbeddingData - for _, dataElem := range parsed.Data { - var embeddingData EmbeddingData - embeddingData.Embedding = dataElem.Embedding - embeddingData.Index = dataElem.Index - embeddings = append(embeddings, embeddingData) + if err = json.Unmarshal(body, &embedResp); err != nil { + return nil, fmt.Errorf("failed to unmarshal response: %w", err) + } + + if len(embedResp.Embeddings) == 0 { + return nil, fmt.Errorf("no embeddings returned") + } + + embeddings := make([]EmbeddingData, 0, len(embedResp.Embeddings)) + + for i, emb := range embedResp.Embeddings { + if len(emb) == 0 { + return nil, fmt.Errorf("empty embedding at index %d", i) + } + + embeddings = append(embeddings, EmbeddingData{ + Embedding: emb, + Index: i, + }) } return embeddings, nil @@ -489,7 +465,6 @@ func (o *OllamaModel) ListModels(apiConfig *APIConfig) ([]string, error) { } url := fmt.Sprintf("%s/%s", baseURL, o.URLSuffix.Models) - reqBody := map[string]interface{}{} jsonData, err := json.Marshal(reqBody) @@ -503,12 +478,6 @@ func (o *OllamaModel) ListModels(apiConfig *APIConfig) ([]string, error) { } req.Header.Set("Content-Type", "application/json") - // Ollama is a local provider and the API key is optional. Only set - // the Authorization header when a non-empty key was supplied. This - // also avoids a nil-pointer dereference on apiConfig or ApiKey. - if apiConfig != nil && apiConfig.ApiKey != nil && *apiConfig.ApiKey != "" { - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) - } resp, err := o.httpClient.Do(req) if err != nil { @@ -533,9 +502,9 @@ func (o *OllamaModel) ListModels(apiConfig *APIConfig) ([]string, error) { // convert result["data"] to []map[string]interface{} models := make([]string, 0) - for _, model := range result["data"].([]interface{}) { + for _, model := range result["models"].([]interface{}) { modelMap := model.(map[string]interface{}) - modelName := modelMap["id"].(string) + modelName := modelMap["name"].(string) models = append(models, modelName) }