Files
ragflow/internal/agent/component/io/docx_writer_test.go
Zhichang Yu 3fa15c0e2f feat(agent): Go port — canvas engine, 22 components, DSL v2, 13 endpoints (#15952)
Ports the agent canvas subsystem from Python to Go.

## What's included

### Canvas Engine (Phase 0/1)
- State engine, scheduler, variable resolver, Redis checkpoint store,
cancel protocol
- **209 tests** across canvas / component / io packages

### 22 Components (P0–P4)
| Tier | Components |
|---|---|
| P0 T1+T2+T3 | LLM, Agent, ExitLoop, Switch, Categorize, Begin,
Message, Invoke |
| P1 T3 | VariableAggregator, VariableAssigner, StringTransform,
ListOperations, DataOperations |
| P2 T3 | Iteration, IterationItem, Loop, LoopItem |
| P3 T3 | UserFillUp, Fillup |
| P4 T5 | Browser, ExcelProcessor, DocsGenerator |

### DSL v2 Schema (Phase 2.5)
- Typed v2 in-memory model with v1-to-v2 auto-detect converter
- v1 legacy field stripping per plan §2.11.7

### HTTP Endpoints & Bug Fixes (Plans PR1–PR3)
- **DELETE SQL bug fix**: gorm v2 `Where("id = ?", id).Delete(...)`
pattern
- **CreateAgent validation**: title/DSL required, duplicate check, 103
envelope
- **13 new endpoints**: templates, prompts, tags, sessions CRUD,
chat/completions (SSE + non-stream stubs), rerun, test_db_connection,
logs, webhook/logs
- **756 Go unit tests** (745 → 756, +18)
- **17 → 0 Python integration test failures** (test_agents.py +
test_session_management/)

### Tools
21 eino tools: HTTPHelper, search tools, financial/data tools, mandatory
stubs

### Infrastructure
OTel observability, NATS message queue, DeepDoc gRPC client, SSRF
guards, IDOR mitigation
2026-06-12 22:58:28 +08:00

184 lines
5.7 KiB
Go

//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package io
import (
"archive/zip"
"bytes"
"io"
"strings"
"testing"
)
// TestDOCXWriter_MinimalDocument: the smallest possible DOCX — no
// header, no footer, no watermark, no page numbers. The output must be
// a valid ZIP starting with the PK magic and contain a document.xml
// with the source text.
func TestDOCXWriter_MinimalDocument(t *testing.T) {
doc, err := WriteDOCX("Hello", DOCXOptions{})
if err != nil {
t.Fatalf("WriteDOCX: %v", err)
}
if len(doc) < 4 {
t.Fatalf("doc too small: %d bytes", len(doc))
}
if !bytes.HasPrefix(doc, []byte{'P', 'K', 0x03, 0x04}) {
t.Fatalf("doc does not start with ZIP magic; first 4 bytes: % x", doc[:4])
}
zr, err := zip.NewReader(bytes.NewReader(doc), int64(len(doc)))
if err != nil {
t.Fatalf("zip.NewReader: %v", err)
}
body, ok := readZipFile(t, zr, "word/document.xml")
if !ok {
t.Fatal("word/document.xml not found in zip")
}
if !strings.Contains(body, "Hello") {
t.Errorf("document.xml missing source text; first 200 chars:\n%s", truncate(body, 200))
}
// The static parts should always be present.
if _, ok := readZipFile(t, zr, "[Content_Types].xml"); !ok {
t.Error("[Content_Types].xml missing")
}
if _, ok := readZipFile(t, zr, "_rels/.rels"); !ok {
t.Error("_rels/.rels missing")
}
}
// TestDOCXWriter_WithHeader: when HeaderText is set, the produced
// zip must contain word/header1.xml with the header text and a
// corresponding relationship entry in document.xml.rels.
func TestDOCXWriter_WithHeader(t *testing.T) {
doc, err := WriteDOCX("X", DOCXOptions{HeaderText: "TOP"})
if err != nil {
t.Fatalf("WriteDOCX: %v", err)
}
zr, err := zip.NewReader(bytes.NewReader(doc), int64(len(doc)))
if err != nil {
t.Fatalf("zip.NewReader: %v", err)
}
hdr, ok := readZipFile(t, zr, "word/header1.xml")
if !ok {
t.Fatal("word/header1.xml missing when HeaderText set")
}
if !strings.Contains(hdr, "TOP") {
t.Errorf("header1.xml missing 'TOP':\n%s", truncate(hdr, 200))
}
rels, ok := readZipFile(t, zr, "word/_rels/document.xml.rels")
if !ok {
t.Fatal("word/_rels/document.xml.rels missing")
}
if !strings.Contains(rels, "rIdHeader1") || !strings.Contains(rels, "header1.xml") {
t.Errorf("document.xml.rels missing header relationship:\n%s", truncate(rels, 200))
}
}
// TestDOCXWriter_XMLEscape: source content with <, >, &, " must be
// XML-escaped in the produced document.xml — the writer must never
// let raw user content break the OOXML topology.
func TestDOCXWriter_XMLEscape(t *testing.T) {
in := `A < B & C > D "quoted"`
doc, err := WriteDOCX(in, DOCXOptions{})
if err != nil {
t.Fatalf("WriteDOCX: %v", err)
}
zr, err := zip.NewReader(bytes.NewReader(doc), int64(len(doc)))
if err != nil {
t.Fatalf("zip.NewReader: %v", err)
}
body, ok := readZipFile(t, zr, "word/document.xml")
if !ok {
t.Fatal("word/document.xml missing")
}
// Escaped forms must appear. html.EscapeString produces the
// standard XML entity set: &lt; / &gt; / &amp; / &#34; (numeric
// for the double-quote, matching Go's stdlib contract).
want := "A &lt; B &amp; C &gt; D &#34;quoted&#34;"
if !strings.Contains(body, want) {
t.Errorf("expected XML-escaped content %q, got:\n%s", want, truncate(body, 400))
}
// Raw < and & must NOT appear inside the <w:t> text run.
if strings.Contains(body, "A < B &") {
t.Errorf("raw 'A < B &' leaked into document.xml")
}
}
// TestDOCXWriter_Watermark: setting WatermarkText should produce a
// header with the VML watermark shape, and the document.xml.rels
// should still include the header reference.
func TestDOCXWriter_Watermark(t *testing.T) {
doc, err := WriteDOCX("body", DOCXOptions{WatermarkText: "DRAFT"})
if err != nil {
t.Fatalf("WriteDOCX: %v", err)
}
zr, err := zip.NewReader(bytes.NewReader(doc), int64(len(doc)))
if err != nil {
t.Fatalf("zip.NewReader: %v", err)
}
hdr, ok := readZipFile(t, zr, "word/header1.xml")
if !ok {
t.Fatal("header1.xml missing when WatermarkText set")
}
if !strings.Contains(hdr, "DRAFT") {
t.Errorf("header1.xml missing watermark text 'DRAFT'")
}
if !strings.Contains(hdr, "v:textpath") {
t.Errorf("header1.xml missing v:textpath (VML watermark shape)")
}
}
// TestDOCXWriter_EmptyContent: an empty content string should still
// produce a valid DOCX (one empty paragraph).
func TestDOCXWriter_EmptyContent(t *testing.T) {
doc, err := WriteDOCX("", DOCXOptions{})
if err != nil {
t.Fatalf("WriteDOCX: %v", err)
}
if len(doc) < 4 || !bytes.HasPrefix(doc, []byte{'P', 'K', 0x03, 0x04}) {
t.Fatalf("expected ZIP magic, got: % x", doc[:4])
}
}
// readZipFile returns the file body as a string, or ("", false) if the
// file is not present.
func readZipFile(t *testing.T, zr *zip.Reader, name string) (string, bool) {
t.Helper()
for _, f := range zr.File {
if f.Name != name {
continue
}
rc, err := f.Open()
if err != nil {
t.Fatalf("open %s: %v", name, err)
}
defer rc.Close()
b, err := io.ReadAll(rc)
if err != nil {
t.Fatalf("read %s: %v", name, err)
}
return string(b), true
}
return "", false
}
func truncate(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n] + "..."
}