2026-06-25 20:16:16 +08:00
|
|
|
|
package parser
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
|
"context"
|
|
|
|
|
|
"image"
|
|
|
|
|
|
"strings"
|
|
|
|
|
|
"testing"
|
2026-06-29 18:46:41 +08:00
|
|
|
|
|
|
|
|
|
|
pdf "ragflow/internal/deepdoc/parser/pdf/type"
|
2026-06-25 20:16:16 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
// TestTableSection_TextFromTSR verifies that table Sections carry
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// TSR-structured text (from pdf.TableItem.Rows) rather than raw char text.
|
2026-06-25 20:16:16 +08:00
|
|
|
|
// Python _parse_loaded_window_into_bboxes runs _extract_table_figure
|
|
|
|
|
|
// which pops table boxes and replaces them with consolidated table
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// entries. Go backfills pdf.Section.Text from pdf.TableItem.Rows after
|
2026-06-25 20:16:16 +08:00
|
|
|
|
// linkTableSections.
|
|
|
|
|
|
func TestTableSection_TextFromTSR(t *testing.T) {
|
|
|
|
|
|
eng := &mockEngine{
|
|
|
|
|
|
pageCount: 1,
|
|
|
|
|
|
renderW: 900, // 300pt at 3x = 900px (216 DPI)
|
|
|
|
|
|
renderH: 600,
|
2026-06-29 18:46:41 +08:00
|
|
|
|
chars: map[int][]pdf.TextChar{0: {
|
2026-06-25 20:16:16 +08:00
|
|
|
|
// PDF space (72 DPI): well inside DLA region
|
|
|
|
|
|
{X0: 50, X1: 70, Top: 40, Bottom: 55, Text: "姓"},
|
|
|
|
|
|
{X0: 80, X1: 100, Top: 40, Bottom: 55, Text: "名"},
|
|
|
|
|
|
}},
|
|
|
|
|
|
}
|
|
|
|
|
|
mock := &MockDocAnalyzer{
|
|
|
|
|
|
Healthy: true,
|
|
|
|
|
|
// DLA table region in pixel space (216 DPI).
|
|
|
|
|
|
// PDF space: x0=100/3≈33, y0=80/3≈27, x1=500/3≈167, y1=300/3≈100.
|
2026-06-29 18:46:41 +08:00
|
|
|
|
DLARegions: []pdf.DLARegion{
|
2026-06-25 20:16:16 +08:00
|
|
|
|
{X0: 100, Y0: 80, X1: 500, Y1: 300, Label: "table", Confidence: 0.9},
|
|
|
|
|
|
},
|
|
|
|
|
|
// TSR returns structured 2x2 cells with text.
|
|
|
|
|
|
// Pixel space (relative to cropped region).
|
2026-06-29 18:46:41 +08:00
|
|
|
|
TSRCells: []pdf.TSRCell{
|
2026-06-25 20:16:16 +08:00
|
|
|
|
{X0: 0, Y0: 0, X1: 200, Y1: 100, Text: "姓名", Label: "table column header"},
|
|
|
|
|
|
{X0: 200, Y0: 0, X1: 460, Y1: 100, Text: "年龄", Label: "table column header"},
|
|
|
|
|
|
{X0: 0, Y0: 100, X1: 200, Y1: 220, Text: "张三", Label: "table row"},
|
|
|
|
|
|
{X0: 200, Y0: 100, X1: 460, Y1: 220, Text: "25", Label: "table row"},
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
2026-06-29 18:46:41 +08:00
|
|
|
|
p := NewParser(pdf.DefaultParserConfig(), mock)
|
2026-06-25 20:16:16 +08:00
|
|
|
|
|
|
|
|
|
|
result, err := p.Parse(context.Background(), eng)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("Parse: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ── Assert 1: Tables exist (Cells are filled by constructTable later) ──
|
|
|
|
|
|
if len(result.Tables) == 0 {
|
2026-06-29 18:46:41 +08:00
|
|
|
|
t.Fatal("expected at least 1 pdf.TableItem")
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
tbl := result.Tables[0]
|
|
|
|
|
|
if len(tbl.Cells) == 0 {
|
2026-06-29 18:46:41 +08:00
|
|
|
|
t.Fatal("expected TSR cells in pdf.TableItem")
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ── Assert 2: A table section exists with HTML output ──
|
2026-06-29 18:46:41 +08:00
|
|
|
|
var tableSections []pdf.Section
|
2026-06-25 20:16:16 +08:00
|
|
|
|
for _, s := range result.Sections {
|
|
|
|
|
|
if s.LayoutType == "table" {
|
|
|
|
|
|
tableSections = append(tableSections, s)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if len(tableSections) == 0 {
|
|
|
|
|
|
t.Fatal("expected at least 1 section with LayoutType=='table'")
|
|
|
|
|
|
}
|
|
|
|
|
|
ts := tableSections[0]
|
|
|
|
|
|
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// ── Assert 3: pdf.Section.Text is HTML table from constructTable ──
|
2026-06-25 20:16:16 +08:00
|
|
|
|
if !strings.HasPrefix(ts.Text, "<table>") {
|
2026-06-29 18:46:41 +08:00
|
|
|
|
t.Errorf("table pdf.Section.Text = %q, want HTML <table>", ts.Text)
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// OSS pipeline: TSR cell text is not preserved in the grid (OSS
|
|
|
|
|
|
// GroupCells creates new cells from row×column cross product).
|
|
|
|
|
|
// Cell text comes from fillCellTextFromBoxes matching PDF chars,
|
|
|
|
|
|
// not from pre-filled TSR cell text (EE feature).
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TestEnrichWithDeepDoc_ImageOnlyPage verifies that enrichWithDeepDoc
|
|
|
|
|
|
// runs DLA on pages that have images but zero embedded chars (boxes).
|
|
|
|
|
|
// Regression test for test.pdf (Go 0 tables, Py 1 table).
|
|
|
|
|
|
func TestEnrichWithDeepDoc_ImageOnlyPage(t *testing.T) {
|
|
|
|
|
|
mock := &MockDocAnalyzer{
|
|
|
|
|
|
Healthy: true,
|
2026-06-29 18:46:41 +08:00
|
|
|
|
DLARegions: []pdf.DLARegion{
|
2026-06-25 20:16:16 +08:00
|
|
|
|
{X0: 54, Y0: 100, X1: 846, Y1: 500, Label: "table", Confidence: 0.95},
|
|
|
|
|
|
},
|
2026-06-29 18:46:41 +08:00
|
|
|
|
TSRCells: []pdf.TSRCell{
|
2026-06-25 20:16:16 +08:00
|
|
|
|
{X0: 0, Y0: 0, X1: 200, Y1: 100, Text: "A", Label: "table row"},
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
2026-06-29 18:46:41 +08:00
|
|
|
|
p := NewParser(pdf.DefaultParserConfig(), mock)
|
2026-06-25 20:16:16 +08:00
|
|
|
|
|
|
|
|
|
|
// 0 text boxes, but page 0 has a rendered image.
|
2026-06-29 18:46:41 +08:00
|
|
|
|
boxes := []pdf.TextBox{}
|
2026-06-25 20:16:16 +08:00
|
|
|
|
dummyImg := image.NewRGBA(image.Rect(0, 0, 900, 600))
|
|
|
|
|
|
pageImages := map[int]image.Image{0: dummyImg}
|
|
|
|
|
|
|
2026-06-29 18:46:41 +08:00
|
|
|
|
tables := p.enrichWithDeepDoc(context.Background(), nil, nil, boxes, pageImages)
|
2026-06-25 20:16:16 +08:00
|
|
|
|
if len(tables) == 0 {
|
|
|
|
|
|
t.Fatal("enrichWithDeepDoc: expected at least 1 table from DLA on page with image but no boxes, got 0")
|
|
|
|
|
|
}
|
|
|
|
|
|
if len(tables[0].Cells) == 0 {
|
|
|
|
|
|
t.Fatal("enrichWithDeepDoc: expected TSR cells in table")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TestFigureCaption_MergedIntoFigure verifies that "figure caption" text
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// is merged into the nearest "figure" pdf.Section and the caption pdf.Section is
|
2026-06-25 20:16:16 +08:00
|
|
|
|
// removed. Matches Python _extract_table_figure caption matching.
|
|
|
|
|
|
func TestFigureCaption_MergedIntoFigure(t *testing.T) {
|
|
|
|
|
|
eng := &mockEngine{
|
|
|
|
|
|
pageCount: 1,
|
|
|
|
|
|
renderW: 1800, renderH: 2400,
|
2026-06-29 18:46:41 +08:00
|
|
|
|
chars: map[int][]pdf.TextChar{0: {
|
2026-06-25 20:16:16 +08:00
|
|
|
|
// Figure text — overlaps DLA figure region (pixel Y=80-300 → PDF 27-100).
|
|
|
|
|
|
{X0: 40, X1: 60, Top: 30, Bottom: 45, Text: "F"},
|
|
|
|
|
|
// Caption text — overlaps DLA figure caption region (pixel Y=310-340 → PDF 103-113).
|
|
|
|
|
|
{X0: 40, X1: 60, Top: 104, Bottom: 112, Text: "C"},
|
|
|
|
|
|
}},
|
|
|
|
|
|
}
|
|
|
|
|
|
mock := &MockDocAnalyzer{
|
|
|
|
|
|
Healthy: true,
|
2026-06-29 18:46:41 +08:00
|
|
|
|
DLARegions: []pdf.DLARegion{
|
2026-06-25 20:16:16 +08:00
|
|
|
|
{X0: 100, Y0: 80, X1: 500, Y1: 300, Label: "figure", Confidence: 0.9},
|
|
|
|
|
|
// Caption is below the figure.
|
|
|
|
|
|
{X0: 100, Y0: 310, X1: 500, Y1: 340, Label: "figure caption", Confidence: 0.9},
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
2026-06-29 18:46:41 +08:00
|
|
|
|
p := NewParser(pdf.DefaultParserConfig(), mock)
|
2026-06-25 20:16:16 +08:00
|
|
|
|
|
|
|
|
|
|
result, err := p.Parse(context.Background(), eng)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("Parse: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// Assert 1: figure caption pdf.Section removed.
|
2026-06-25 20:16:16 +08:00
|
|
|
|
for _, s := range result.Sections {
|
|
|
|
|
|
if s.LayoutType == "figure caption" {
|
2026-06-29 18:46:41 +08:00
|
|
|
|
t.Errorf("figure caption pdf.Section should be removed after mergeCaptions, got %q", s.Text)
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// Assert 2: figure pdf.Section exists and has caption text appended.
|
|
|
|
|
|
var fig *pdf.Section
|
2026-06-25 20:16:16 +08:00
|
|
|
|
for i := range result.Sections {
|
|
|
|
|
|
if result.Sections[i].LayoutType == "figure" {
|
|
|
|
|
|
fig = &result.Sections[i]
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if fig == nil {
|
2026-06-29 18:46:41 +08:00
|
|
|
|
t.Fatal("expected a figure pdf.Section")
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
if !strings.Contains(fig.Text, "C") {
|
|
|
|
|
|
t.Errorf("figure Text should contain caption text 'C', got %q", fig.Text)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// Assert 3: figure is in result.Figures().
|
|
|
|
|
|
if len(result.Figures()) == 0 {
|
|
|
|
|
|
t.Error("expected at least 1 entry in result.Figures()")
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TestTableCaption_MergedIntoTable verifies that "table caption" text
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// is merged into the nearest table pdf.Section and the caption is removed.
|
2026-06-25 20:16:16 +08:00
|
|
|
|
func TestTableCaption_MergedIntoTable(t *testing.T) {
|
|
|
|
|
|
eng := &mockEngine{
|
|
|
|
|
|
pageCount: 1,
|
|
|
|
|
|
renderW: 1800, renderH: 2400,
|
2026-06-29 18:46:41 +08:00
|
|
|
|
chars: map[int][]pdf.TextChar{0: {
|
2026-06-25 20:16:16 +08:00
|
|
|
|
// Table text — overlaps DLA table region (pixel Y=80-300 → PDF 27-100).
|
|
|
|
|
|
{X0: 40, X1: 60, Top: 30, Bottom: 45, Text: "T"},
|
|
|
|
|
|
// Caption text — overlaps DLA table caption region (pixel Y=310-340 → PDF 103-113).
|
|
|
|
|
|
{X0: 40, X1: 60, Top: 104, Bottom: 112, Text: "C"},
|
|
|
|
|
|
}},
|
|
|
|
|
|
}
|
|
|
|
|
|
mock := &MockDocAnalyzer{
|
|
|
|
|
|
Healthy: true,
|
2026-06-29 18:46:41 +08:00
|
|
|
|
DLARegions: []pdf.DLARegion{
|
2026-06-25 20:16:16 +08:00
|
|
|
|
{X0: 100, Y0: 80, X1: 500, Y1: 300, Label: "table", Confidence: 0.9},
|
|
|
|
|
|
{X0: 100, Y0: 310, X1: 500, Y1: 340, Label: "table caption", Confidence: 0.9},
|
|
|
|
|
|
},
|
2026-06-29 18:46:41 +08:00
|
|
|
|
TSRCells: []pdf.TSRCell{
|
2026-06-25 20:16:16 +08:00
|
|
|
|
{X0: 0, Y0: 0, X1: 200, Y1: 100, Text: "A", Label: "table row"},
|
|
|
|
|
|
{X0: 200, Y0: 0, X1: 460, Y1: 100, Text: "B", Label: "table row"},
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
2026-06-29 18:46:41 +08:00
|
|
|
|
p := NewParser(pdf.DefaultParserConfig(), mock)
|
2026-06-25 20:16:16 +08:00
|
|
|
|
|
|
|
|
|
|
result, err := p.Parse(context.Background(), eng)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("Parse: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// Assert: table caption pdf.Section removed, text merged into table pdf.Section.
|
2026-06-25 20:16:16 +08:00
|
|
|
|
for _, s := range result.Sections {
|
|
|
|
|
|
if s.LayoutType == "table caption" {
|
2026-06-29 18:46:41 +08:00
|
|
|
|
t.Errorf("table caption pdf.Section should be removed, got %q", s.Text)
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-06-29 18:46:41 +08:00
|
|
|
|
var tbl *pdf.Section
|
2026-06-25 20:16:16 +08:00
|
|
|
|
for i := range result.Sections {
|
|
|
|
|
|
if result.Sections[i].LayoutType == "table" {
|
|
|
|
|
|
tbl = &result.Sections[i]
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if tbl == nil {
|
2026-06-29 18:46:41 +08:00
|
|
|
|
t.Fatal("expected a table pdf.Section")
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
if !strings.Contains(tbl.Text, "C") {
|
|
|
|
|
|
t.Errorf("table Text should contain caption text 'C', got %q", tbl.Text)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TestTextSectionsInsideTableRegion_Suppressed verifies that Sections
|
|
|
|
|
|
// whose positions fall inside a table region are suppressed even when
|
|
|
|
|
|
// DLA labeled them as "text". Python _extract_table_figure pops ALL
|
|
|
|
|
|
// boxes overlapping a table region, regardless of their DLA label.
|
|
|
|
|
|
// This is the #1 cause of Go vs Python discrepancy on table-heavy PDFs.
|
|
|
|
|
|
func TestTextSectionsInsideTableRegion_Suppressed(t *testing.T) {
|
|
|
|
|
|
eng := &mockEngine{
|
|
|
|
|
|
pageCount: 1,
|
|
|
|
|
|
renderW: 1800, renderH: 2400,
|
2026-06-29 18:46:41 +08:00
|
|
|
|
chars: map[int][]pdf.TextChar{0: {
|
2026-06-25 20:16:16 +08:00
|
|
|
|
// Box A: inside DLA table region, labeled as "text" by DLA.
|
|
|
|
|
|
{X0: 50, X1: 100, Top: 40, Bottom: 55, Text: "碎片文字"},
|
|
|
|
|
|
// Box B: inside DLA table region, same situation.
|
|
|
|
|
|
{X0: 120, X1: 160, Top: 40, Bottom: 55, Text: "垃圾"},
|
|
|
|
|
|
}},
|
|
|
|
|
|
}
|
|
|
|
|
|
// DLA returns a "table" region AND a "text" sub-region inside it.
|
|
|
|
|
|
// Real DLA often splits large table regions this way.
|
|
|
|
|
|
mock := &MockDocAnalyzer{
|
|
|
|
|
|
Healthy: true,
|
2026-06-29 18:46:41 +08:00
|
|
|
|
DLARegions: []pdf.DLARegion{
|
2026-06-25 20:16:16 +08:00
|
|
|
|
{X0: 100, Y0: 80, X1: 500, Y1: 300, Label: "table", Confidence: 0.9},
|
|
|
|
|
|
{X0: 120, Y0: 100, X1: 180, Y1: 140, Label: "text", Confidence: 0.8},
|
|
|
|
|
|
},
|
2026-06-29 18:46:41 +08:00
|
|
|
|
TSRCells: []pdf.TSRCell{
|
2026-06-25 20:16:16 +08:00
|
|
|
|
{X0: 0, Y0: 0, X1: 200, Y1: 100, Text: "姓名", Label: "table row"},
|
|
|
|
|
|
{X0: 200, Y0: 0, X1: 460, Y1: 100, Text: "年龄", Label: "table row"},
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
2026-06-29 18:46:41 +08:00
|
|
|
|
p := NewParser(pdf.DefaultParserConfig(), mock)
|
2026-06-25 20:16:16 +08:00
|
|
|
|
|
|
|
|
|
|
result, err := p.Parse(context.Background(), eng)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("Parse: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-29 18:46:41 +08:00
|
|
|
|
// Assert 1: table pdf.Section exists with structured text.
|
2026-06-25 20:16:16 +08:00
|
|
|
|
var hasTable bool
|
|
|
|
|
|
for _, s := range result.Sections {
|
|
|
|
|
|
if s.LayoutType == "table" && s.Text != "" {
|
|
|
|
|
|
hasTable = true
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if !hasTable {
|
2026-06-29 18:46:41 +08:00
|
|
|
|
t.Fatal("expected a table pdf.Section with structured text")
|
2026-06-25 20:16:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Assert 2: NO "text" fragment sections remain — they were inside
|
|
|
|
|
|
// the table region and should be suppressed (Python pops them).
|
|
|
|
|
|
for _, s := range result.Sections {
|
|
|
|
|
|
if s.LayoutType != "table" && strings.Contains(s.Text, "碎片") {
|
|
|
|
|
|
t.Errorf("text fragment %q inside table region should be suppressed, got %q",
|
|
|
|
|
|
s.Text, s.LayoutType)
|
|
|
|
|
|
}
|
|
|
|
|
|
if s.LayoutType != "table" && strings.Contains(s.Text, "垃圾") {
|
|
|
|
|
|
t.Errorf("text fragment %q inside table region should be suppressed, got %q",
|
|
|
|
|
|
s.Text, s.LayoutType)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
sectionCount := len(result.Sections)
|
|
|
|
|
|
if sectionCount > 3 {
|
|
|
|
|
|
t.Errorf("expected ≤3 sections (table + outside fragments), got %d", sectionCount)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TestEmptyDoc_NoCrash verifies Parse handles edge cases gracefully.
|
|
|
|
|
|
func TestEmptyDoc_NoCrash(t *testing.T) {
|
|
|
|
|
|
eng := &mockEngine{pageCount: 0}
|
2026-06-29 18:46:41 +08:00
|
|
|
|
p := NewParser(pdf.DefaultParserConfig(), &MockDocAnalyzer{Healthy: true})
|
2026-06-25 20:16:16 +08:00
|
|
|
|
result, err := p.Parse(context.Background(), eng)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("Parse: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if len(result.Sections) != 0 {
|
|
|
|
|
|
t.Errorf("expected 0 sections for empty doc, got %d", len(result.Sections))
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TestNilChars_handled verifies zero-chars pages don't crash.
|
|
|
|
|
|
func TestNilChars_Handled(t *testing.T) {
|
|
|
|
|
|
eng := &mockEngine{pageCount: 1, renderW: 200, renderH: 200}
|
2026-06-29 18:46:41 +08:00
|
|
|
|
p := NewParser(pdf.DefaultParserConfig(), &MockDocAnalyzer{Healthy: true})
|
2026-06-25 20:16:16 +08:00
|
|
|
|
result, err := p.Parse(context.Background(), eng)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
t.Fatalf("Parse: %v", err)
|
|
|
|
|
|
}
|
|
|
|
|
|
if len(result.Sections) != 0 && p.DeepDoc != nil {
|
|
|
|
|
|
t.Logf("nil chars + DeepDoc: sections=%d (may trigger OCR path)", len(result.Sections))
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|