mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-03 01:01:56 +08:00
### What problem does this PR solve? Package refactor and PDF post process. ### Type of change - [x] Refactoring --------- Co-authored-by: Claude <noreply@anthropic.com>
41 lines
1.1 KiB
Go
41 lines
1.1 KiB
Go
package layout
|
|
|
|
import (
|
|
pdf "ragflow/internal/deepdoc/parser/pdf/type"
|
|
"testing"
|
|
)
|
|
|
|
func TestMergeSameBullet(t *testing.T) {
|
|
boxes := []pdf.TextBox{
|
|
{Text: "* item 1", Top: 100, Bottom: 112, X0: 50, X1: 200},
|
|
{Text: "* item 2", Top: 114, Bottom: 126, X0: 50, X1: 200},
|
|
}
|
|
result := MergeSameBullet(boxes, nil)
|
|
if len(result) != 1 {
|
|
t.Errorf("expected 1 merged box, got %d", len(result))
|
|
}
|
|
}
|
|
|
|
func TestMergeSameBulletNoMerge(t *testing.T) {
|
|
boxes := []pdf.TextBox{
|
|
{Text: "A item", Top: 100, Bottom: 112, X0: 50, X1: 200},
|
|
{Text: "B item", Top: 114, Bottom: 126, X0: 50, X1: 200},
|
|
}
|
|
result := MergeSameBullet(boxes, nil)
|
|
if len(result) != 2 {
|
|
t.Error("different first chars should not merge")
|
|
}
|
|
}
|
|
|
|
func TestMergeSameBulletChinese(t *testing.T) {
|
|
// Chinese chars start, should not merge via bullet rule
|
|
boxes := []pdf.TextBox{
|
|
{Text: "测试文本", Top: 100, Bottom: 112, X0: 50, X1: 200},
|
|
{Text: "测试内容", Top: 114, Bottom: 126, X0: 50, X1: 200},
|
|
}
|
|
result := MergeSameBullet(boxes, nil)
|
|
if len(result) != 2 {
|
|
t.Error("Chinese chars should not merge via bullet rule")
|
|
}
|
|
}
|