mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Go: implement reasoning_chat, TTS, ASR for Groq (#15153)
### What problem does this PR solve?
Go: implement reasoning_chat, TTS, ASR for Groq
**Verify from CLI**
```
RAGFlow(user)> think chat with 'qwen/qwen3-32b@test@groq' message 'who r u'
Thinking: Okay, the user asked, who r u. I need to determine what the user is asking. They may be asking about my identity. I should introduce my name and basic functions. The user might want to know what I can do, so I should list some common use cases, such as answering questions, creating writing, coding, and expressing opinions. The user may be curious about how they can interact with me, so they can be advised to ask any questions or provide instructions. Keep your answers conversational, avoid overly technical terms, keep answers concise, and encourage further interaction. Check if there's any ambiguity in the answer and make sure it's accurate and meets the user's needs. Also consider if there are other aspects the user may be interested in, such as my training data or performance. But since the question is basic, I'll focus on the essentials first and invite the user to ask more. In summary, respond to the user's questions by introducing yourself, your functions, and encouraging further interaction.
Answer: Hello! I'm Qwen. I am a large-scale language model developed by Tongyi Lab, designed to assist you in various ways, such as answering questions, creating text, logical reasoning, programming, and more. I aim to provide clear, accurate, and helpful information and support. How can I assist you today? Feel free to ask any questions or give me tasks! 😊
Time: 2.199908
RAGFlow(user)> stream think chat with 'openai/gpt-oss-20b@test@groq' message 'who r u'
Thinking: to respond politely.
Answer: ’m ChatGPT—an AI language model created by OpenAI. I’m here to answer questions, offer explanations, and help with a wide range of topics. How can I assist you today?
RAGFlow(user)> tts with 'canopylabs/orpheus-arabic-saudi@test@groq' text 'hello? show yourself' play format 'wav' param '{"voice": "fahad"}'
SUCCESS
RAGFlow(user)> asr with 'whisper-large-v3-turbo@test@groq' audio './internal/test.wav' param '{"language": "en"}'
+----------------------------------------------------------------------------------------------------------------------+
| text |
+----------------------------------------------------------------------------------------------------------------------+
| The examination and testimony of the experts enabled the Commission to conclude that five shots may have been fired |
+----------------------------------------------------------------------------------------------------------------------+
```
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@@ -5,7 +5,9 @@
|
||||
},
|
||||
"url_suffix": {
|
||||
"chat": "chat/completions",
|
||||
"models": "models"
|
||||
"models": "models",
|
||||
"asr": "audio/transcriptions",
|
||||
"tts": "audio/speech"
|
||||
},
|
||||
"class": "groq",
|
||||
"models": [
|
||||
@@ -51,6 +53,13 @@
|
||||
"chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "openai/gpt-oss-20b",
|
||||
"max_tokens": 131072,
|
||||
"model_types": [
|
||||
"chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "meta-llama/llama-4-scout-17b-16e-instruct",
|
||||
"max_tokens": 131072,
|
||||
@@ -64,6 +73,30 @@
|
||||
"model_types": [
|
||||
"chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "canopylabs/orpheus-v1-english",
|
||||
"model_types": [
|
||||
"tts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "canopylabs/orpheus-arabic-saudi",
|
||||
"model_types": [
|
||||
"tts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "whisper-large-v3-turbo",
|
||||
"model_types": [
|
||||
"asr"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "whisper-large-v3",
|
||||
"model_types": [
|
||||
"asr"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -23,7 +23,11 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
@@ -113,6 +117,16 @@ func groqChatPayload(modelName string, messages []Message, stream bool, chatMode
|
||||
"stream": stream,
|
||||
}
|
||||
|
||||
modelLower := strings.ToLower(modelName)
|
||||
if strings.Contains(modelLower, "gpt-oss") {
|
||||
reqBody["include_reasoning"] = true
|
||||
if chatModelConfig.Effort != nil {
|
||||
reqBody["reasoning_effort"] = chatModelConfig.Effort
|
||||
}
|
||||
} else if strings.Contains(modelLower, "qwen") || strings.Contains(modelLower, "deepseek") {
|
||||
reqBody["reasoning_format"] = "parsed"
|
||||
}
|
||||
|
||||
if chatModelConfig != nil {
|
||||
if chatModelConfig.MaxTokens != nil {
|
||||
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
|
||||
@@ -126,6 +140,7 @@ func groqChatPayload(modelName string, messages []Message, stream bool, chatMode
|
||||
if chatModelConfig.Stop != nil {
|
||||
reqBody["stop"] = *chatModelConfig.Stop
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return reqBody
|
||||
@@ -403,7 +418,115 @@ func (g *GroqModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error
|
||||
}
|
||||
|
||||
func (g *GroqModel) TranscribeAudio(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig) (*ASRResponse, error) {
|
||||
return nil, fmt.Errorf("%s, no such method", g.Name())
|
||||
if file == nil || *file == "" {
|
||||
return nil, fmt.Errorf("file is missing")
|
||||
}
|
||||
|
||||
region := "default"
|
||||
if apiConfig != nil && apiConfig.Region != nil && *apiConfig.Region != "" {
|
||||
region = *apiConfig.Region
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/%s", g.BaseURL[region], g.URLSuffix.ASR)
|
||||
|
||||
// multipart body
|
||||
var body bytes.Buffer
|
||||
writer := multipart.NewWriter(&body)
|
||||
|
||||
// open audio file
|
||||
audioFile, err := os.Open(*file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open audio file: %w", err)
|
||||
}
|
||||
defer audioFile.Close()
|
||||
|
||||
// create multipart file field
|
||||
part, err := writer.CreateFormFile(
|
||||
"file",
|
||||
filepath.Base(*file),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create multipart file: %w", err)
|
||||
}
|
||||
|
||||
// copy file content
|
||||
if _, err = io.Copy(part, audioFile); err != nil {
|
||||
return nil, fmt.Errorf("failed to copy audio data: %w", err)
|
||||
}
|
||||
|
||||
// model field
|
||||
if err := writer.WriteField("model", *modelName); err != nil {
|
||||
return nil, fmt.Errorf("failed to write model field: %w", err)
|
||||
}
|
||||
|
||||
// extra params
|
||||
if asrConfig != nil && asrConfig.Params != nil {
|
||||
for key, value := range asrConfig.Params {
|
||||
|
||||
var val string
|
||||
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
val = v
|
||||
case bool:
|
||||
val = strconv.FormatBool(v)
|
||||
case int:
|
||||
val = strconv.Itoa(v)
|
||||
case int64:
|
||||
val = strconv.FormatInt(v, 10)
|
||||
case float32:
|
||||
val = strconv.FormatFloat(float64(v), 'f', -1, 32)
|
||||
case float64:
|
||||
val = strconv.FormatFloat(v, 'f', -1, 64)
|
||||
default:
|
||||
val = fmt.Sprintf("%v", v)
|
||||
}
|
||||
|
||||
if err = writer.WriteField(key, val); err != nil {
|
||||
return nil, fmt.Errorf("failed to write field %s: %w", key, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err = writer.Close(); err != nil {
|
||||
return nil, fmt.Errorf("failed to close multipart writer: %w", err)
|
||||
}
|
||||
|
||||
// build request
|
||||
req, err := http.NewRequest("POST", url, &body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
||||
req.Header.Set("Content-Type", writer.FormDataContentType())
|
||||
|
||||
// send request
|
||||
resp, err := g.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
respBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("Groq ASR error: %s - %s", resp.Status, string(respBody))
|
||||
}
|
||||
|
||||
// response
|
||||
var result struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
if err = json.Unmarshal(respBody, &result); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal response: %w, body=%s", err, string(respBody))
|
||||
}
|
||||
|
||||
return &ASRResponse{Text: result.Text}, nil
|
||||
}
|
||||
|
||||
func (g *GroqModel) TranscribeAudioWithSender(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig, sender func(*string, *string) error) error {
|
||||
@@ -411,7 +534,60 @@ func (g *GroqModel) TranscribeAudioWithSender(modelName *string, file *string, a
|
||||
}
|
||||
|
||||
func (g *GroqModel) AudioSpeech(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig) (*TTSResponse, error) {
|
||||
return nil, fmt.Errorf("%s, no such method", g.Name())
|
||||
if audioContent == nil || *audioContent == "" {
|
||||
return nil, fmt.Errorf("audio content is empty")
|
||||
}
|
||||
|
||||
var region = "default"
|
||||
if apiConfig != nil && apiConfig.Region != nil && *apiConfig.Region != "" {
|
||||
region = *apiConfig.Region
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/%s", g.BaseURL[region], g.URLSuffix.TTS)
|
||||
|
||||
reqBody := map[string]interface{}{
|
||||
"model": *modelName,
|
||||
"input": *audioContent,
|
||||
}
|
||||
|
||||
if ttsConfig != nil && ttsConfig.Params != nil {
|
||||
for key, value := range ttsConfig.Params {
|
||||
reqBody[key] = value
|
||||
}
|
||||
}
|
||||
if ttsConfig != nil && ttsConfig.Format != "" {
|
||||
reqBody["response_format"] = ttsConfig.Format
|
||||
}
|
||||
|
||||
jsonData, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
|
||||
|
||||
resp, err := g.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("%s - %s", resp.Status, string(body))
|
||||
}
|
||||
|
||||
return &TTSResponse{Audio: body}, nil
|
||||
}
|
||||
|
||||
func (g *GroqModel) AudioSpeechWithSender(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig, sender func(*string, *string) error) error {
|
||||
|
||||
Reference in New Issue
Block a user