diff --git a/conf/models/groq.json b/conf/models/groq.json index 827e3829ef..4ec32c6d2c 100644 --- a/conf/models/groq.json +++ b/conf/models/groq.json @@ -5,7 +5,9 @@ }, "url_suffix": { "chat": "chat/completions", - "models": "models" + "models": "models", + "asr": "audio/transcriptions", + "tts": "audio/speech" }, "class": "groq", "models": [ @@ -51,6 +53,13 @@ "chat" ] }, + { + "name": "openai/gpt-oss-20b", + "max_tokens": 131072, + "model_types": [ + "chat" + ] + }, { "name": "meta-llama/llama-4-scout-17b-16e-instruct", "max_tokens": 131072, @@ -64,6 +73,30 @@ "model_types": [ "chat" ] + }, + { + "name": "canopylabs/orpheus-v1-english", + "model_types": [ + "tts" + ] + }, + { + "name": "canopylabs/orpheus-arabic-saudi", + "model_types": [ + "tts" + ] + }, + { + "name": "whisper-large-v3-turbo", + "model_types": [ + "asr" + ] + }, + { + "name": "whisper-large-v3", + "model_types": [ + "asr" + ] } ] } diff --git a/internal/entity/models/groq.go b/internal/entity/models/groq.go index 87072c95ac..fdf3d0652d 100644 --- a/internal/entity/models/groq.go +++ b/internal/entity/models/groq.go @@ -23,7 +23,11 @@ import ( "encoding/json" "fmt" "io" + "mime/multipart" "net/http" + "os" + "path/filepath" + "strconv" "strings" "time" ) @@ -113,6 +117,16 @@ func groqChatPayload(modelName string, messages []Message, stream bool, chatMode "stream": stream, } + modelLower := strings.ToLower(modelName) + if strings.Contains(modelLower, "gpt-oss") { + reqBody["include_reasoning"] = true + if chatModelConfig.Effort != nil { + reqBody["reasoning_effort"] = chatModelConfig.Effort + } + } else if strings.Contains(modelLower, "qwen") || strings.Contains(modelLower, "deepseek") { + reqBody["reasoning_format"] = "parsed" + } + if chatModelConfig != nil { if chatModelConfig.MaxTokens != nil { reqBody["max_tokens"] = *chatModelConfig.MaxTokens @@ -126,6 +140,7 @@ func groqChatPayload(modelName string, messages []Message, stream bool, chatMode if chatModelConfig.Stop != nil { reqBody["stop"] = *chatModelConfig.Stop } + } return reqBody @@ -403,7 +418,115 @@ func (g *GroqModel) Balance(apiConfig *APIConfig) (map[string]interface{}, error } func (g *GroqModel) TranscribeAudio(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig) (*ASRResponse, error) { - return nil, fmt.Errorf("%s, no such method", g.Name()) + if file == nil || *file == "" { + return nil, fmt.Errorf("file is missing") + } + + region := "default" + if apiConfig != nil && apiConfig.Region != nil && *apiConfig.Region != "" { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/%s", g.BaseURL[region], g.URLSuffix.ASR) + + // multipart body + var body bytes.Buffer + writer := multipart.NewWriter(&body) + + // open audio file + audioFile, err := os.Open(*file) + if err != nil { + return nil, fmt.Errorf("failed to open audio file: %w", err) + } + defer audioFile.Close() + + // create multipart file field + part, err := writer.CreateFormFile( + "file", + filepath.Base(*file), + ) + if err != nil { + return nil, fmt.Errorf("failed to create multipart file: %w", err) + } + + // copy file content + if _, err = io.Copy(part, audioFile); err != nil { + return nil, fmt.Errorf("failed to copy audio data: %w", err) + } + + // model field + if err := writer.WriteField("model", *modelName); err != nil { + return nil, fmt.Errorf("failed to write model field: %w", err) + } + + // extra params + if asrConfig != nil && asrConfig.Params != nil { + for key, value := range asrConfig.Params { + + var val string + + switch v := value.(type) { + case string: + val = v + case bool: + val = strconv.FormatBool(v) + case int: + val = strconv.Itoa(v) + case int64: + val = strconv.FormatInt(v, 10) + case float32: + val = strconv.FormatFloat(float64(v), 'f', -1, 32) + case float64: + val = strconv.FormatFloat(v, 'f', -1, 64) + default: + val = fmt.Sprintf("%v", v) + } + + if err = writer.WriteField(key, val); err != nil { + return nil, fmt.Errorf("failed to write field %s: %w", key, err) + } + } + } + + if err = writer.Close(); err != nil { + return nil, fmt.Errorf("failed to close multipart writer: %w", err) + } + + // build request + req, err := http.NewRequest("POST", url, &body) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + req.Header.Set("Content-Type", writer.FormDataContentType()) + + // send request + resp, err := g.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("Groq ASR error: %s - %s", resp.Status, string(respBody)) + } + + // response + var result struct { + Text string `json:"text"` + } + + if err = json.Unmarshal(respBody, &result); err != nil { + return nil, fmt.Errorf("failed to unmarshal response: %w, body=%s", err, string(respBody)) + } + + return &ASRResponse{Text: result.Text}, nil } func (g *GroqModel) TranscribeAudioWithSender(modelName *string, file *string, apiConfig *APIConfig, asrConfig *ASRConfig, sender func(*string, *string) error) error { @@ -411,7 +534,60 @@ func (g *GroqModel) TranscribeAudioWithSender(modelName *string, file *string, a } func (g *GroqModel) AudioSpeech(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig) (*TTSResponse, error) { - return nil, fmt.Errorf("%s, no such method", g.Name()) + if audioContent == nil || *audioContent == "" { + return nil, fmt.Errorf("audio content is empty") + } + + var region = "default" + if apiConfig != nil && apiConfig.Region != nil && *apiConfig.Region != "" { + region = *apiConfig.Region + } + + url := fmt.Sprintf("%s/%s", g.BaseURL[region], g.URLSuffix.TTS) + + reqBody := map[string]interface{}{ + "model": *modelName, + "input": *audioContent, + } + + if ttsConfig != nil && ttsConfig.Params != nil { + for key, value := range ttsConfig.Params { + reqBody[key] = value + } + } + if ttsConfig != nil && ttsConfig.Format != "" { + reqBody["response_format"] = ttsConfig.Format + } + + jsonData, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiConfig.ApiKey)) + + resp, err := g.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("%s - %s", resp.Status, string(body)) + } + + return &TTSResponse{Audio: body}, nil } func (g *GroqModel) AudioSpeechWithSender(modelName *string, audioContent *string, apiConfig *APIConfig, ttsConfig *TTSConfig, sender func(*string, *string) error) error {