mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Ports the agent canvas subsystem from Python to Go.
## What's included
### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages
### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |
### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7
### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)
### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs
### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
184 lines
5.7 KiB
Go
184 lines
5.7 KiB
Go
//
|
|
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
package io
|
|
|
|
import (
|
|
"archive/zip"
|
|
"bytes"
|
|
"io"
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
// TestDOCXWriter_MinimalDocument: the smallest possible DOCX — no
|
|
// header, no footer, no watermark, no page numbers. The output must be
|
|
// a valid ZIP starting with the PK magic and contain a document.xml
|
|
// with the source text.
|
|
func TestDOCXWriter_MinimalDocument(t *testing.T) {
|
|
doc, err := WriteDOCX("Hello", DOCXOptions{})
|
|
if err != nil {
|
|
t.Fatalf("WriteDOCX: %v", err)
|
|
}
|
|
if len(doc) < 4 {
|
|
t.Fatalf("doc too small: %d bytes", len(doc))
|
|
}
|
|
if !bytes.HasPrefix(doc, []byte{'P', 'K', 0x03, 0x04}) {
|
|
t.Fatalf("doc does not start with ZIP magic; first 4 bytes: % x", doc[:4])
|
|
}
|
|
zr, err := zip.NewReader(bytes.NewReader(doc), int64(len(doc)))
|
|
if err != nil {
|
|
t.Fatalf("zip.NewReader: %v", err)
|
|
}
|
|
body, ok := readZipFile(t, zr, "word/document.xml")
|
|
if !ok {
|
|
t.Fatal("word/document.xml not found in zip")
|
|
}
|
|
if !strings.Contains(body, "Hello") {
|
|
t.Errorf("document.xml missing source text; first 200 chars:\n%s", truncate(body, 200))
|
|
}
|
|
// The static parts should always be present.
|
|
if _, ok := readZipFile(t, zr, "[Content_Types].xml"); !ok {
|
|
t.Error("[Content_Types].xml missing")
|
|
}
|
|
if _, ok := readZipFile(t, zr, "_rels/.rels"); !ok {
|
|
t.Error("_rels/.rels missing")
|
|
}
|
|
}
|
|
|
|
// TestDOCXWriter_WithHeader: when HeaderText is set, the produced
|
|
// zip must contain word/header1.xml with the header text and a
|
|
// corresponding relationship entry in document.xml.rels.
|
|
func TestDOCXWriter_WithHeader(t *testing.T) {
|
|
doc, err := WriteDOCX("X", DOCXOptions{HeaderText: "TOP"})
|
|
if err != nil {
|
|
t.Fatalf("WriteDOCX: %v", err)
|
|
}
|
|
zr, err := zip.NewReader(bytes.NewReader(doc), int64(len(doc)))
|
|
if err != nil {
|
|
t.Fatalf("zip.NewReader: %v", err)
|
|
}
|
|
hdr, ok := readZipFile(t, zr, "word/header1.xml")
|
|
if !ok {
|
|
t.Fatal("word/header1.xml missing when HeaderText set")
|
|
}
|
|
if !strings.Contains(hdr, "TOP") {
|
|
t.Errorf("header1.xml missing 'TOP':\n%s", truncate(hdr, 200))
|
|
}
|
|
rels, ok := readZipFile(t, zr, "word/_rels/document.xml.rels")
|
|
if !ok {
|
|
t.Fatal("word/_rels/document.xml.rels missing")
|
|
}
|
|
if !strings.Contains(rels, "rIdHeader1") || !strings.Contains(rels, "header1.xml") {
|
|
t.Errorf("document.xml.rels missing header relationship:\n%s", truncate(rels, 200))
|
|
}
|
|
}
|
|
|
|
// TestDOCXWriter_XMLEscape: source content with <, >, &, " must be
|
|
// XML-escaped in the produced document.xml — the writer must never
|
|
// let raw user content break the OOXML topology.
|
|
func TestDOCXWriter_XMLEscape(t *testing.T) {
|
|
in := `A < B & C > D "quoted"`
|
|
doc, err := WriteDOCX(in, DOCXOptions{})
|
|
if err != nil {
|
|
t.Fatalf("WriteDOCX: %v", err)
|
|
}
|
|
zr, err := zip.NewReader(bytes.NewReader(doc), int64(len(doc)))
|
|
if err != nil {
|
|
t.Fatalf("zip.NewReader: %v", err)
|
|
}
|
|
body, ok := readZipFile(t, zr, "word/document.xml")
|
|
if !ok {
|
|
t.Fatal("word/document.xml missing")
|
|
}
|
|
// Escaped forms must appear. html.EscapeString produces the
|
|
// standard XML entity set: < / > / & / " (numeric
|
|
// for the double-quote, matching Go's stdlib contract).
|
|
want := "A < B & C > D "quoted""
|
|
if !strings.Contains(body, want) {
|
|
t.Errorf("expected XML-escaped content %q, got:\n%s", want, truncate(body, 400))
|
|
}
|
|
// Raw < and & must NOT appear inside the <w:t> text run.
|
|
if strings.Contains(body, "A < B &") {
|
|
t.Errorf("raw 'A < B &' leaked into document.xml")
|
|
}
|
|
}
|
|
|
|
// TestDOCXWriter_Watermark: setting WatermarkText should produce a
|
|
// header with the VML watermark shape, and the document.xml.rels
|
|
// should still include the header reference.
|
|
func TestDOCXWriter_Watermark(t *testing.T) {
|
|
doc, err := WriteDOCX("body", DOCXOptions{WatermarkText: "DRAFT"})
|
|
if err != nil {
|
|
t.Fatalf("WriteDOCX: %v", err)
|
|
}
|
|
zr, err := zip.NewReader(bytes.NewReader(doc), int64(len(doc)))
|
|
if err != nil {
|
|
t.Fatalf("zip.NewReader: %v", err)
|
|
}
|
|
hdr, ok := readZipFile(t, zr, "word/header1.xml")
|
|
if !ok {
|
|
t.Fatal("header1.xml missing when WatermarkText set")
|
|
}
|
|
if !strings.Contains(hdr, "DRAFT") {
|
|
t.Errorf("header1.xml missing watermark text 'DRAFT'")
|
|
}
|
|
if !strings.Contains(hdr, "v:textpath") {
|
|
t.Errorf("header1.xml missing v:textpath (VML watermark shape)")
|
|
}
|
|
}
|
|
|
|
// TestDOCXWriter_EmptyContent: an empty content string should still
|
|
// produce a valid DOCX (one empty paragraph).
|
|
func TestDOCXWriter_EmptyContent(t *testing.T) {
|
|
doc, err := WriteDOCX("", DOCXOptions{})
|
|
if err != nil {
|
|
t.Fatalf("WriteDOCX: %v", err)
|
|
}
|
|
if len(doc) < 4 || !bytes.HasPrefix(doc, []byte{'P', 'K', 0x03, 0x04}) {
|
|
t.Fatalf("expected ZIP magic, got: % x", doc[:4])
|
|
}
|
|
}
|
|
|
|
// readZipFile returns the file body as a string, or ("", false) if the
|
|
// file is not present.
|
|
func readZipFile(t *testing.T, zr *zip.Reader, name string) (string, bool) {
|
|
t.Helper()
|
|
for _, f := range zr.File {
|
|
if f.Name != name {
|
|
continue
|
|
}
|
|
rc, err := f.Open()
|
|
if err != nil {
|
|
t.Fatalf("open %s: %v", name, err)
|
|
}
|
|
defer rc.Close()
|
|
b, err := io.ReadAll(rc)
|
|
if err != nil {
|
|
t.Fatalf("read %s: %v", name, err)
|
|
}
|
|
return string(b), true
|
|
}
|
|
return "", false
|
|
}
|
|
|
|
func truncate(s string, n int) string {
|
|
if len(s) <= n {
|
|
return s
|
|
}
|
|
return s[:n] + "..."
|
|
}
|