mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-01 16:25:44 +08:00
Replaces the Python agent canvas runtime with a Go implementation that runs inside `cmd/server_main`. The canvas compiles into an eino Workflow that pauses on wait-for-user via native Interrupt/Resume (no sentinel flag) and resumes from a Redis-backed CheckPointStore. All 21 Python agent components and ~35 tools are ported with functional parity. Sandbox providers now read their JSON config from the admin-panel system_settings table with env fallback. 234 files / +35,413 / -6,111. All Go files are gofmt-clean (CI gate added); drops the v2 DSL E2E step and the gap-analysis plan (both redundant after the port ships). ## Type of change - [x] Refactoring - [x] New feature - [x] Bug fix 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>
150 lines
5.4 KiB
Go
150 lines
5.4 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
// tts_dispatch.go — TTS dispatcher interface for the audio package.
|
|
//
|
|
// The audio package's ModelProviderFunc contract (see
|
|
// model_provider_synthesizer.go) is a function-typed seam that the
|
|
// production boot (cmd/server_main.go) plugs in. This file extracts
|
|
// the dispatch logic into a small `TTSDispatcher` interface so the
|
|
// dispatch can be unit-tested without the audio package depending on
|
|
// internal/service. The interface is the minimum surface the audio
|
|
// package needs:
|
|
//
|
|
// - Synthesize: a single method that the model's audio driver
|
|
// actually exposes (see internal/entity/models/types.go:32-33
|
|
// BaseModel.AudioSpeech); everything else (provider lookup,
|
|
// tenant resolution, fallback model selection) is the model's
|
|
// own internal responsibility.
|
|
//
|
|
// The audio package does not import internal/service directly; it
|
|
// takes a TTSDispatcher (typically the *service.ModelProviderService
|
|
// instance installed at boot). The function returns a non-nil
|
|
// SynthesizeResponse on success and a non-nil error on every
|
|
// failure path; the audio package's caller (modelProviderSynthesizer)
|
|
// maps nil-error-with-empty-audio to ErrSynthesizeEmpty and nil-
|
|
// error-with-non-empty-audio to a clean pass.
|
|
|
|
package audio
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"ragflow/internal/common"
|
|
modelModule "ragflow/internal/entity/models"
|
|
)
|
|
|
|
// TTSDispatcher is the minimum interface the audio package needs
|
|
// from the project's model provider service. It mirrors the
|
|
// *service.ModelProviderService.AudioSpeech method shape so the
|
|
// production wiring is a one-line cast. Tests can substitute a
|
|
// stub without spinning up a real model driver.
|
|
//
|
|
// The signature matches the real AudioSpeech exactly (including
|
|
// the common.ErrorCode return) so no adapter wrapper is needed
|
|
// at the call site. A non-CodeSuccess return is treated as an
|
|
// error; the audio package propagates the error to the SSE
|
|
// consumer.
|
|
type TTSDispatcher interface {
|
|
AudioSpeech(
|
|
providerName, instanceName, modelName, modelID *string,
|
|
userID string,
|
|
audioContent *string,
|
|
apiConfig *modelModule.APIConfig,
|
|
modelConfig *modelModule.TTSConfig,
|
|
) (*modelModule.TTSResponse, common.ErrorCode, error)
|
|
}
|
|
|
|
// NewTTSDispatchFunc returns an audio.ModelProviderFunc that
|
|
// dispatches a SynthesizeRequest to the supplied TTSDispatcher.
|
|
//
|
|
// Field mapping (audio.SynthesizeRequest → model dispatch):
|
|
//
|
|
// - ModelProviderRequest.ModelName (from req.Engine) → modelName
|
|
// The Engine field is repurposed as a model identifier hint
|
|
// in the audio package's contract. Empty falls through to the
|
|
// model's default TTS model.
|
|
// - Text → audioContent
|
|
// - Voice → TTSConfig.Params["voice"]
|
|
// - Lang → TTSConfig.Params["lang"]
|
|
//
|
|
// Error contract: a non-nil error short-circuits the audio
|
|
// package's cache (no write) and surfaces to the caller as a
|
|
// failed Synthesize. A nil error with nil TTSResponse or empty
|
|
// audio is also an error (the audio package treats it as
|
|
// "model produced no audio"); we surface that as
|
|
// ErrSynthesizeEmpty so the failure is observable in logs.
|
|
func NewTTSDispatchFunc(d TTSDispatcher) ModelProviderFunc {
|
|
if d == nil {
|
|
return nil
|
|
}
|
|
return func(ctx context.Context, req ModelProviderRequest) (*SynthesizeResponse, error) {
|
|
// ModelName / Engine may both be empty; both are legal —
|
|
// the model dispatcher will fall back to the tenant's
|
|
// default TTS model in that case.
|
|
var modelName *string
|
|
if req.ModelName != "" {
|
|
mn := req.ModelName
|
|
modelName = &mn
|
|
}
|
|
|
|
// We don't have a per-request APIConfig; leave nil so
|
|
// the model's default credentials / base URL take effect.
|
|
var apiConfig *modelModule.APIConfig
|
|
|
|
// Build a TTSConfig from the request's voice + lang so
|
|
// the model driver can select a voice variant when the
|
|
// provider supports it (e.g. OpenAI's alloy/echo fable,
|
|
// edge-tts' voice short-name).
|
|
ttsConfig := &modelModule.TTSConfig{Params: map[string]any{}}
|
|
if req.Voice != "" {
|
|
ttsConfig.Params["voice"] = req.Voice
|
|
}
|
|
if req.Lang != "" {
|
|
ttsConfig.Params["lang"] = req.Lang
|
|
}
|
|
if len(ttsConfig.Params) == 0 {
|
|
ttsConfig.Params = nil
|
|
}
|
|
|
|
text := req.Text
|
|
resp, code, err := d.AudioSpeech(
|
|
nil, // providerName — let the dispatcher resolve by name
|
|
nil, // instanceName — same
|
|
modelName,
|
|
nil, // modelID — look up by (provider, instance, model)
|
|
req.TenantID,
|
|
&text,
|
|
apiConfig,
|
|
ttsConfig,
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("audio: TTS model-provider dispatch: %w", err)
|
|
}
|
|
if code != common.CodeSuccess {
|
|
return nil, fmt.Errorf("audio: TTS model-provider dispatch: code=%d", code)
|
|
}
|
|
if resp == nil || len(resp.Audio) == 0 {
|
|
return nil, ErrSynthesizeEmpty
|
|
}
|
|
return &SynthesizeResponse{
|
|
Audio: resp.Audio,
|
|
MediaType: "audio/mpeg",
|
|
}, nil
|
|
}
|
|
}
|