Files
ragflow/internal/agent/audio/tts_dispatch.go
Zhichang Yu e45659868a feat(agent): ship the Go agent canvas port — eino interrupt/resume + Redis check-pointing (#16035)
Replaces the Python agent canvas runtime with a Go implementation that
runs inside `cmd/server_main`.

The canvas compiles into an eino Workflow that pauses on wait-for-user
via native Interrupt/Resume (no sentinel flag) and resumes from a
Redis-backed CheckPointStore.

All 21 Python agent components and ~35 tools are ported with functional
parity.

Sandbox providers now read their JSON config from the admin-panel
system_settings table with env fallback.

234 files / +35,413 / -6,111. All Go files are gofmt-clean (CI gate
added); drops the v2 DSL E2E step and the gap-analysis plan (both
redundant after the port ships).

## Type of change

- [x] Refactoring
- [x] New feature
- [x] Bug fix

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
2026-06-17 13:24:03 +08:00

150 lines
5.4 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// tts_dispatch.go — TTS dispatcher interface for the audio package.
//
// The audio package's ModelProviderFunc contract (see
// model_provider_synthesizer.go) is a function-typed seam that the
// production boot (cmd/server_main.go) plugs in. This file extracts
// the dispatch logic into a small `TTSDispatcher` interface so the
// dispatch can be unit-tested without the audio package depending on
// internal/service. The interface is the minimum surface the audio
// package needs:
//
// - Synthesize: a single method that the model's audio driver
// actually exposes (see internal/entity/models/types.go:32-33
// BaseModel.AudioSpeech); everything else (provider lookup,
// tenant resolution, fallback model selection) is the model's
// own internal responsibility.
//
// The audio package does not import internal/service directly; it
// takes a TTSDispatcher (typically the *service.ModelProviderService
// instance installed at boot). The function returns a non-nil
// SynthesizeResponse on success and a non-nil error on every
// failure path; the audio package's caller (modelProviderSynthesizer)
// maps nil-error-with-empty-audio to ErrSynthesizeEmpty and nil-
// error-with-non-empty-audio to a clean pass.
package audio
import (
"context"
"fmt"
"ragflow/internal/common"
modelModule "ragflow/internal/entity/models"
)
// TTSDispatcher is the minimum interface the audio package needs
// from the project's model provider service. It mirrors the
// *service.ModelProviderService.AudioSpeech method shape so the
// production wiring is a one-line cast. Tests can substitute a
// stub without spinning up a real model driver.
//
// The signature matches the real AudioSpeech exactly (including
// the common.ErrorCode return) so no adapter wrapper is needed
// at the call site. A non-CodeSuccess return is treated as an
// error; the audio package propagates the error to the SSE
// consumer.
type TTSDispatcher interface {
AudioSpeech(
providerName, instanceName, modelName, modelID *string,
userID string,
audioContent *string,
apiConfig *modelModule.APIConfig,
modelConfig *modelModule.TTSConfig,
) (*modelModule.TTSResponse, common.ErrorCode, error)
}
// NewTTSDispatchFunc returns an audio.ModelProviderFunc that
// dispatches a SynthesizeRequest to the supplied TTSDispatcher.
//
// Field mapping (audio.SynthesizeRequest → model dispatch):
//
// - ModelProviderRequest.ModelName (from req.Engine) → modelName
// The Engine field is repurposed as a model identifier hint
// in the audio package's contract. Empty falls through to the
// model's default TTS model.
// - Text → audioContent
// - Voice → TTSConfig.Params["voice"]
// - Lang → TTSConfig.Params["lang"]
//
// Error contract: a non-nil error short-circuits the audio
// package's cache (no write) and surfaces to the caller as a
// failed Synthesize. A nil error with nil TTSResponse or empty
// audio is also an error (the audio package treats it as
// "model produced no audio"); we surface that as
// ErrSynthesizeEmpty so the failure is observable in logs.
func NewTTSDispatchFunc(d TTSDispatcher) ModelProviderFunc {
if d == nil {
return nil
}
return func(ctx context.Context, req ModelProviderRequest) (*SynthesizeResponse, error) {
// ModelName / Engine may both be empty; both are legal —
// the model dispatcher will fall back to the tenant's
// default TTS model in that case.
var modelName *string
if req.ModelName != "" {
mn := req.ModelName
modelName = &mn
}
// We don't have a per-request APIConfig; leave nil so
// the model's default credentials / base URL take effect.
var apiConfig *modelModule.APIConfig
// Build a TTSConfig from the request's voice + lang so
// the model driver can select a voice variant when the
// provider supports it (e.g. OpenAI's alloy/echo fable,
// edge-tts' voice short-name).
ttsConfig := &modelModule.TTSConfig{Params: map[string]any{}}
if req.Voice != "" {
ttsConfig.Params["voice"] = req.Voice
}
if req.Lang != "" {
ttsConfig.Params["lang"] = req.Lang
}
if len(ttsConfig.Params) == 0 {
ttsConfig.Params = nil
}
text := req.Text
resp, code, err := d.AudioSpeech(
nil, // providerName — let the dispatcher resolve by name
nil, // instanceName — same
modelName,
nil, // modelID — look up by (provider, instance, model)
req.TenantID,
&text,
apiConfig,
ttsConfig,
)
if err != nil {
return nil, fmt.Errorf("audio: TTS model-provider dispatch: %w", err)
}
if code != common.CodeSuccess {
return nil, fmt.Errorf("audio: TTS model-provider dispatch: code=%d", code)
}
if resp == nil || len(resp.Audio) == 0 {
return nil, ErrSynthesizeEmpty
}
return &SynthesizeResponse{
Audio: resp.Audio,
MediaType: "audio/mpeg",
}, nil
}
}