Files
ragflow/internal/service/bot_completion.go
Zhichang Yu 4c54cefd29 Port 14 upstream agent security / correctness fixes to Go canvas (#16455)
Mirrors 14 merged upstream PRs into the Go agent port.

PRs ported:
  - #15609 ExeSQL SSRF guard + DNS pin
  - #15436 HTTP timeout on external API tools
  - #16363 be_output restore + DeepL error path
  - #15644 switch no longer matches empty condition
  - #15374 session_id bind to path agent_id (DAO idor guard)
  - #16169 sandbox artifact ownership gate
  - #15457 tenant ownership on agentbots
  - #15145 rerun agent document access check
- #15446 thinking switch (component portion; provider policy lives in
internal/llm)
  - #15426 Invoke URL/proxy SSRF + DNS pin + no-redirects
  - #15238 agentbot thinking-logs beta endpoint
  - #14589 UserFillUp SSE event propagation
  - #14890 anonymous webhook opt-in
- #15068 PipelineChunker new component (text/file_ref/parser_id
dispatch; file-format extraction is a follow-up)

40 files, +2355 / -58 lines. 33 new tests, all targeted package suites
pass (1721 + 4 skipped); 1 pre-existing flaky test unrelated.
2026-06-30 16:28:48 +08:00

425 lines
15 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// bot_completion.go is the SSE envelope writer + ChatbotCompletion
// service path for /api/v1/chatbots/<dialog_id>/completions. The wire
// shape is dictated by the existing python
// `api/db/services/conversation_service.py::async_iframe_completion`
// — JS widgets reading the iframe SDK expect this exact envelope, so
// any change to the frame keys is a wire-contract change.
//
// Frame shape (one JSON object per `data:` line):
//
// {"code":0,"message":"","data":{"answer":"...","reference":{...},
// "audio_binary":null,"id":"...","session_id":"..."}, ...}
//
// The final completion marker is `data: {"code":0,"message":"",
// "data":true}` followed by the OpenAI-style `data: [DONE]` line
// that the existing Go SSE writers emit on the production
// /agents/chat/completions path.
package service
import (
"context"
"encoding/json"
"errors"
"net/http"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
"ragflow/internal/agent/canvas"
"ragflow/internal/common"
"ragflow/internal/entity"
modelModule "ragflow/internal/entity/models"
)
// ChatbotSSEFrame is one envelope pushed to the SSE writer by the
// chatbot completion path. Err takes precedence over Data and is
// rendered as a python-style {code:500, message:str(e),
// data:{answer:"**ERROR**..."}} frame.
type ChatbotSSEFrame struct {
// Event is the canvas.RunEvent type ("message",
// "user_inputs", "workflow_finished", etc.). It is
// forwarded in the SSE envelope as the `event` field so the
// front-end can distinguish interactive form pauses from
// plain assistant text (PR #14589). The field is omitted
// from the JSON when empty to preserve the original wire
// shape for callers that do not set it.
Event string `json:"event,omitempty"`
Data string `json:"-"`
Reference map[string]any `json:"-"`
SessionID string `json:"-"`
Done bool `json:"-"`
Err error `json:"-"`
}
// WriteChatbotFrame emits one python-style SSE frame and flushes the
// underlying http.ResponseWriter. The frame is `data: <json>\n\n`
// and is byte-equivalent to the python side so the iframe SDK and
// existing JS widgets keep working.
//
// Error frames sanitize the message — internal errors (gorm stack
// frames, SQL details, storage paths) MUST NOT be echoed to the
// client. The caller is expected to log the real error via
// common.Error / zap before publishing the frame; only a generic
// placeholder is rendered here. Mirrors the python
// `api/db/services/conversation_service.py` error frame shape.
func WriteChatbotFrame(w http.ResponseWriter, f ChatbotSSEFrame) error {
var payload map[string]any
if f.Err != nil {
const clientErrMsg = "an internal error occurred"
payload = map[string]any{
"code": 500,
"message": clientErrMsg,
"data": map[string]any{
"answer": clientErrMsg,
"reference": map[string]any{},
},
}
} else {
data := map[string]any{
"answer": f.Data,
"reference": f.Reference,
"audio_binary": nil,
"id": nil,
"session_id": f.SessionID,
}
// Forward the canvas event type so the front-end can
// distinguish interactive form pauses ("user_inputs",
// "workflow_finished") from plain assistant messages
// (PR #14589). When Event is empty the field is omitted
// from the JSON so existing message frames stay
// byte-compatible.
if f.Event != "" {
data["event"] = f.Event
}
payload = map[string]any{
"code": 0,
"message": "",
"data": data,
}
}
b, err := json.Marshal(payload)
if err != nil {
return err
}
if _, err := w.Write([]byte("data: ")); err != nil {
return err
}
if _, err := w.Write(b); err != nil {
return err
}
if _, err := w.Write([]byte("\n\n")); err != nil {
return err
}
if flusher, ok := w.(http.Flusher); ok {
flusher.Flush()
}
return nil
}
// WriteDoneFrame emits the python completion marker
// `data: {"code":0,"message":"","data":true}\n\n` followed by the
// OpenAI-style `data: [DONE]\n\n` terminator. Used by both bot
// completion paths.
func WriteDoneFrame(w http.ResponseWriter) error {
if _, err := w.Write([]byte(`data: {"code":0,"message":"","data":true}` + "\n\n")); err != nil {
return err
}
if _, err := w.Write([]byte("data: [DONE]\n\n")); err != nil {
return err
}
if flusher, ok := w.(http.Flusher); ok {
flusher.Flush()
}
return nil
}
// WriteChatbotRunEvent translates one canvas.RunEvent into the
// unified python-shaped chat-completion envelope (same shape as
// WriteChatbotFrame). This unifies the SSE format across:
//
// - /api/v1/agents/chat/completions (was: writeChatCompletionSSE)
// - /api/v1/agentbots/<id>/completions (was: WriteChatbotFrame per-event)
//
// The "done" event type emits `data: [DONE]\n\n` (no envelope),
// matching the OpenAI-style terminator and the existing
// AgentbotCompletion wire.
//
// For non-done events, ev.Data is placed verbatim into the `answer`
// field — callers pass canvas-runner output that is itself a JSON
// string (e.g. `{"answer":"hi back","reference":[]}`); the iframe
// SDK then JSON.parse()s the `answer` string to extract the inner
// fields. This matches the existing AgentbotCompletion behaviour.
//
// The event type is forwarded as the `event` field of the envelope
// (PR #14589) so the front-end can distinguish interactive
// `user_inputs` / `workflow_finished` events from plain `message`
// streams and render the UserFillUp form vs the assistant text.
// Without this field the form UI never appears because the
// iframe SDK has no way to know the canvas paused for human
// input.
//
// Returns the write error so callers can short-circuit; both nil
// and io.ErrClosedPipe are tolerated because the client may have
// disconnected mid-stream.
func WriteChatbotRunEvent(w http.ResponseWriter, ev canvas.RunEvent) error {
if ev.Type == "done" {
_, err := w.Write([]byte("data: [DONE]\n\n"))
if err != nil {
return err
}
if flusher, ok := w.(http.Flusher); ok {
flusher.Flush()
}
return nil
}
f := ChatbotSSEFrame{
Event: ev.Type,
Data: ev.Data,
Reference: map[string]any{},
SessionID: ev.SessionID,
}
return WriteChatbotFrame(w, f)
}
// AgentbotSSEFrame mirrors ChatbotSSEFrame for the agentbot
// completion path. The envelope shape is the same; the only
// difference is that the LLM call goes through the canvas runner
// (AgentService.RunAgent) instead of the legacy dialog async_chat.
type AgentbotSSEFrame = ChatbotSSEFrame
// WriteAgentbotFrame is an alias for WriteChatbotFrame — both bot
// completion paths emit the same python wire shape.
func WriteAgentbotFrame(w http.ResponseWriter, f ChatbotSSEFrame) error {
return WriteChatbotFrame(w, f)
}
// ChatbotCompletion streams an SSE response for
// /api/v1/chatbots/<dialog_id>/completions.
//
// The full LLM session-lifecycle implementation is added below. It
// is a v1 port: it yields a single frame per turn (the Go LLMBundle
// chat call is non-streaming), seeded with the dialog's prologue
// when the request creates a new session.
//
// Authorisation: dialog must exist, belong to the requester's tenant,
// and have status == common.StatusDialogValid.
func (s *BotService) ChatbotCompletion(
ctx context.Context, tenantID, dialogID string, req ChatbotCompletionRequest,
) (<-chan ChatbotSSEFrame, common.ErrorCode, error) {
// 1. Load and authorise the dialog.
//
// ChatSessionDAO.GetDialogByID already filters by status = "1"
// so a returned row is valid; we still nil-check defensively
// before dereferencing for symmetry with the session path.
dialog, err := s.chatDAO.GetDialogByID(dialogID)
if err != nil || dialog == nil ||
dialog.TenantID != tenantID ||
dialog.Status == nil || *dialog.Status != common.StatusDialogValid {
return nil, common.CodeDataError, errors.New("no access to this chatbot")
}
// 2. Resolve or create the session row.
//
// API4ConversationDAO.GetBySessionID returns (nil, nil) on miss
// (not an error) — see internal/dao/api_token.go:146. We MUST
// check the pointer before dereferencing, otherwise the
// session-tenant check below nil-derefs. Plan Risk R7.
//
// UserID vs tenantID (security H3 follow-up):
// `entity.API4Conversation.UserID` is a generic user-id slot
// in the production Python flow
// (api/db/services/conversation_service.py:258 — the python
// async_iframe_completion saves `user_id=kwargs.get("user_id", "")`).
// The Go BotHandler routes pass `user.ID` through the
// "tenantID" parameter (the Go User struct collapses user and
// tenant into one identifier — see project CLAUDE.md), so
// writing `tenantID` here actually stores the requester's
// user-id (== tenant-id) in the python user-id slot. The
// session-tenant check on the read path compares against the
// same value, so write/read stay symmetric. We keep this
// behaviour and add the comment so a future reader doesn't
// "fix" it to a tenant-id lookup and break the symmetry.
var session *entity.API4Conversation
if req.SessionID != "" {
session, err = s.api4ConversationDAO.GetBySessionID(req.SessionID, dialogID)
if err != nil {
return nil, common.CodeServerError, err
}
if session == nil || session.UserID != tenantID {
return nil, common.CodeDataError, errors.New("session not found")
}
} else {
// Seed a new session. The Message column is json.RawMessage;
// pre-serialise the prologue turn as a JSON array of
// {role,content,created_at} dicts — same shape the python
// conversation_service.py:253-272 writes. Plan Risk R4.
prologue := stringFromMap(dialog.PromptConfig, "prologue")
seedMsg, _ := json.Marshal([]map[string]any{
{
"role": "assistant",
"content": prologue,
"created_at": time.Now().Unix(),
},
})
session = &entity.API4Conversation{
ID: uuid.NewString(),
DialogID: dialogID,
UserID: tenantID,
Message: seedMsg,
}
if err := s.api4ConversationDAO.Create(session); err != nil {
return nil, common.CodeServerError, err
}
}
// 3. Resolve the chat LLM via ModelProviderService. The python
// async_iframe_completion resolves the same way through
// LLMBundle(tenant_id, dialog.llm_id); the Go equivalent is
// GetChatModelConfig → NewChatModel → driver.ChatWithMessages.
//
// If llmService is unwired (test boot path) or the dialog has
// no LLM configured, we surface a sanitized CodeDataError
// rather than echoing the bare error string into the SSE
// envelope — see WriteChatbotFrame's sanitization contract.
if s.llmService == nil {
return nil, common.CodeServerError, errors.New("bot: llm service not wired")
}
if dialog.LLMID == "" {
return nil, common.CodeDataError, errors.New("no LLM configured for this chatbot")
}
modelProvider := NewModelProviderService()
driver, modelName, apiConfig, _, err := modelProvider.GetChatModelConfig(tenantID, dialog.LLMID)
if err != nil {
return nil, common.CodeDataError, errors.New("no LLM configured for this chatbot")
}
chatModel := modelModule.NewChatModel(driver, &modelName, apiConfig)
// 4. Build the prompt from prior conversation history plus the
// new user turn. Without this, a resumed session_id would
// authorise reuse but the LLM call would still be stateless
// turn-to-turn — a Python parity regression for any multi-turn
// chatbot client. The Message column on api_4_conversation is a
// json.RawMessage array of {role, content, created_at} dicts,
// matching the python conversation_service.py:253-272 shape.
messages := historyToMessages(session.Message)
messages = append(messages, modelModule.Message{Role: "user", Content: req.Question})
// 5. Yield frames on a channel.
out := make(chan ChatbotSSEFrame, 4)
go func() {
defer close(out)
resp, callErr := chatModel.ModelDriver.ChatWithMessages(
modelName, messages, chatModel.APIConfig, &modelModule.ChatConfig{},
)
if callErr != nil {
// Log the real error with structured context so
// ops can debug, but do NOT echo the raw
// err.Error() to the client (security M2:
// internal gorm/SQL/file-path leaks).
common.Error("bot: ChatbotCompletion LLM call failed",
callErr,
zap.String("dialog_id", dialogID),
zap.String("session_id", session.ID),
zap.String("llm_id", dialog.LLMID),
)
out <- ChatbotSSEFrame{
Err: errors.New("an internal error occurred"),
SessionID: session.ID,
}
out <- ChatbotSSEFrame{Done: true}
return
}
answer := ""
if resp != nil && resp.Answer != nil {
answer = *resp.Answer
}
// Persist the new turn pair (user + assistant) back to
// api_4_conversation so the NEXT call to ChatbotCompletion
// with the same session_id sees this turn in messages.
// Update errors are logged but do NOT fail the SSE stream
// — the answer has already been produced. The next call
// will rebuild from the prior (pre-this-turn) snapshot,
// losing at most the latest exchange; acceptable for v1.
newTurns := append(historyFromMessages(messages),
map[string]any{"role": "assistant", "content": answer, "created_at": time.Now().Unix()},
)
if updated, mErr := json.Marshal(newTurns); mErr == nil {
session.Message = updated
if uErr := s.api4ConversationDAO.Update(session); uErr != nil {
common.Error("bot: ChatbotCompletion session update failed",
uErr,
zap.String("dialog_id", dialogID),
zap.String("session_id", session.ID),
)
}
}
out <- ChatbotSSEFrame{
Data: answer,
Reference: map[string]any{},
SessionID: session.ID,
}
out <- ChatbotSSEFrame{Done: true}
}()
return out, common.CodeSuccess, nil
}
// historyToMessages reads the session.Message JSON array of
// {role, content, ...} dicts and projects it onto modelModule.Message
// for the LLM driver. Tolerates an empty / malformed Message column
// by returning an empty slice — the caller appends the new user turn
// so the LLM still receives the current prompt.
func historyToMessages(raw json.RawMessage) []modelModule.Message {
if len(raw) == 0 {
return nil
}
var turns []map[string]any
if err := json.Unmarshal(raw, &turns); err != nil {
return nil
}
out := make([]modelModule.Message, 0, len(turns))
for _, t := range turns {
role, _ := t["role"].(string)
content, _ := t["content"].(string)
if role == "" || content == "" {
continue
}
out = append(out, modelModule.Message{Role: role, Content: content})
}
return out
}
// historyFromMessages is the inverse projection — used to write the
// updated turn list back to the api_4_conversation.Message column.
func historyFromMessages(msgs []modelModule.Message) []map[string]any {
out := make([]map[string]any, 0, len(msgs))
now := time.Now().Unix()
for i, m := range msgs {
out = append(out, map[string]any{
"role": m.Role,
"content": m.Content,
"created_at": now + int64(i), // preserve order, monotonic
})
}
return out
}