Go: implement reasoning_chat, TTS, ASR for Groq (#15153)

### What problem does this PR solve?

Go: implement reasoning_chat, TTS, ASR for Groq

**Verify from CLI**
```
RAGFlow(user)> think chat with 'qwen/qwen3-32b@test@groq' message 'who r u'
Thinking: Okay, the user asked, who r u. I need to determine what the user is asking. They may be asking about my identity. I should introduce my name and basic functions. The user might want to know what I can do, so I should list some common use cases, such as answering questions, creating writing, coding, and expressing opinions. The user may be curious about how they can interact with me, so they can be advised to ask any questions or provide instructions. Keep your answers conversational, avoid overly technical terms, keep answers concise, and encourage further interaction. Check if there's any ambiguity in the answer and make sure it's accurate and meets the user's needs. Also consider if there are other aspects the user may be interested in, such as my training data or performance. But since the question is basic, I'll focus on the essentials first and invite the user to ask more. In summary, respond to the user's questions by introducing yourself, your functions, and encouraging further interaction.

Answer: Hello! I'm Qwen. I am a large-scale language model developed by Tongyi Lab, designed to assist you in various ways, such as answering questions, creating text, logical reasoning, programming, and more. I aim to provide clear, accurate, and helpful information and support. How can I assist you today? Feel free to ask any questions or give me tasks! 😊
Time: 2.199908


RAGFlow(user)> stream think chat with 'openai/gpt-oss-20b@test@groq' message 'who r u'
Thinking:  to respond politely.
Answer: ’m ChatGPT—an AI language model created by OpenAI. I’m here to answer questions, offer explanations, and help with a wide range of topics. How can I assist you today?


RAGFlow(user)> tts with 'canopylabs/orpheus-arabic-saudi@test@groq' text 'hello? show yourself' play format 'wav' param '{"voice": "fahad"}'
SUCCESS


RAGFlow(user)> asr with 'whisper-large-v3-turbo@test@groq' audio './internal/test.wav' param '{"language": "en"}'
+----------------------------------------------------------------------------------------------------------------------+
| text                                                                                                                 |
+----------------------------------------------------------------------------------------------------------------------+
|  The examination and testimony of the experts enabled the Commission to conclude that five shots may have been fired |
+----------------------------------------------------------------------------------------------------------------------+
```

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Tohka
2026-05-22 18:02:30 +08:00
committed by GitHub
parent 3f02ca7ba1
commit 302f97de50
2 changed files with 212 additions and 3 deletions

View File

@@ -5,7 +5,9 @@
},
"url_suffix": {
"chat": "chat/completions",
"models": "models"
"models": "models",
"asr": "audio/transcriptions",
"tts": "audio/speech"
},
"class": "groq",
"models": [
@@ -51,6 +53,13 @@
"chat"
]
},
{
"name": "openai/gpt-oss-20b",
"max_tokens": 131072,
"model_types": [
"chat"
]
},
{
"name": "meta-llama/llama-4-scout-17b-16e-instruct",
"max_tokens": 131072,
@@ -64,6 +73,30 @@
"model_types": [
"chat"
]
},
{
"name": "canopylabs/orpheus-v1-english",
"model_types": [
"tts"
]
},
{
"name": "canopylabs/orpheus-arabic-saudi",
"model_types": [
"tts"
]
},
{
"name": "whisper-large-v3-turbo",
"model_types": [
"asr"
]
},
{
"name": "whisper-large-v3",
"model_types": [
"asr"
]
}
]
}

View File

@@ -23,7 +23,11 @@ import (
"encoding/json"
"fmt"
"io"
"mime/multipart"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"
"time"
)
@@ -113,6 +117,16 @@ func groqChatPayload(modelName string, messages []Message, stream bool, chatMode
"stream": stream,
}
modelLower := strings.ToLower(modelName)
if strings.Contains(modelLower, "gpt-oss") {
reqBody["include_reasoning"] = true
if chatModelConfig.Effort != nil {
reqBody["reasoning_effort"] = chatModelConfig.Effort
}
} else if strings.Contains(modelLower, "qwen") || strings.Contains(modelLower, "deepseek") {
reqBody["reasoning_format"] = "parsed"
}
if chatModelConfig != nil {
if chatModelConfig.MaxTokens != nil {
reqBody["max_tokens"] = *chatModelConfig.MaxTokens
@@ -126,6 +140,7 @@ func groqChatPayload(modelName string, messages []Message, stream bool, chatMode
if chatModelConfig.Stop != nil {
reqBody["stop"] = *chatModelConfig.Stop
}
}
return reqBody
@@ -403,7 +418,115 @@ func (g *GroqModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error
}
func (g *GroqModel) TranscribeAudio(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig) (*ASRResponse, error) {
return nil, fmt.Errorf("%s, no such method", g.Name())
if file == nil || *file == "" {
return nil, fmt.Errorf("file is missing")
}
region := "default"
if apiConfig != nil && apiConfig.Region != nil && *apiConfig.Region != "" {
region = *apiConfig.Region
}
url := fmt.Sprintf("%s/%s", g.BaseURL[region], g.URLSuffix.ASR)
// multipart body
var body bytes.Buffer
writer := multipart.NewWriter(&body)
// open audio file
audioFile, err := os.Open(*file)
if err != nil {
return nil, fmt.Errorf("failed to open audio file: %w", err)
}
defer audioFile.Close()
// create multipart file field
part, err := writer.CreateFormFile(
"file",
filepath.Base(*file),
)
if err != nil {
return nil, fmt.Errorf("failed to create multipart file: %w", err)
}
// copy file content
if _, err = io.Copy(part, audioFile); err != nil {
return nil, fmt.Errorf("failed to copy audio data: %w", err)
}
// model field
if err := writer.WriteField("model", *modelName); err != nil {
return nil, fmt.Errorf("failed to write model field: %w", err)
}
// extra params
if asrConfig != nil && asrConfig.Params != nil {
for key, value := range asrConfig.Params {
var val string
switch v := value.(type) {
case string:
val = v
case bool:
val = strconv.FormatBool(v)
case int:
val = strconv.Itoa(v)
case int64:
val = strconv.FormatInt(v, 10)
case float32:
val = strconv.FormatFloat(float64(v), 'f', -1, 32)
case float64:
val = strconv.FormatFloat(v, 'f', -1, 64)
default:
val = fmt.Sprintf("%v", v)
}
if err = writer.WriteField(key, val); err != nil {
return nil, fmt.Errorf("failed to write field %s: %w", key, err)
}
}
}
if err = writer.Close(); err != nil {
return nil, fmt.Errorf("failed to close multipart writer: %w", err)
}
// build request
req, err := http.NewRequest("POST", url, &body)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
req.Header.Set("Content-Type", writer.FormDataContentType())
// send request
resp, err := g.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Groq ASR error: %s - %s", resp.Status, string(respBody))
}
// response
var result struct {
Text string `json:"text"`
}
if err = json.Unmarshal(respBody, &result); err != nil {
return nil, fmt.Errorf("failed to unmarshal response: %w, body=%s", err, string(respBody))
}
return &ASRResponse{Text: result.Text}, nil
}
func (g *GroqModel) TranscribeAudioWithSender(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig, sender func(*string, *string) error) error {
@@ -411,7 +534,60 @@ func (g *GroqModel) TranscribeAudioWithSender(modelName *string, file *string, a
}
func (g *GroqModel) AudioSpeech(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig) (*TTSResponse, error) {
return nil, fmt.Errorf("%s, no such method", g.Name())
if audioContent == nil || *audioContent == "" {
return nil, fmt.Errorf("audio content is empty")
}
var region = "default"
if apiConfig != nil && apiConfig.Region != nil && *apiConfig.Region != "" {
region = *apiConfig.Region
}
url := fmt.Sprintf("%s/%s", g.BaseURL[region], g.URLSuffix.TTS)
reqBody := map[string]interface{}{
"model": *modelName,
"input": *audioContent,
}
if ttsConfig != nil && ttsConfig.Params != nil {
for key, value := range ttsConfig.Params {
reqBody[key] = value
}
}
if ttsConfig != nil && ttsConfig.Format != "" {
reqBody["response_format"] = ttsConfig.Format
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey))
resp, err := g.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("%s - %s", resp.Status, string(body))
}
return &TTSResponse{Audio: body}, nil
}
func (g *GroqModel) AudioSpeechWithSender(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig, sender func(*string, *string) error) error {