mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
196 lines
6.0 KiB
Go
196 lines
6.0 KiB
Go
|
|
//go:build cgo
|
||
|
|
|
||
|
|
package parser
|
||
|
|
|
||
|
|
import (
|
||
|
|
"context"
|
||
|
|
"errors"
|
||
|
|
"image"
|
||
|
|
"testing"
|
||
|
|
)
|
||
|
|
|
||
|
|
func TestOCRRecognizeBatch_EmptyList(t *testing.T) {
|
||
|
|
mock := &MockDocAnalyzer{Healthy: true}
|
||
|
|
results, errs := mock.OCRRecognizeBatch(context.Background(), nil)
|
||
|
|
if len(results) != 0 {
|
||
|
|
t.Errorf("nil input: expected 0 results, got %d", len(results))
|
||
|
|
}
|
||
|
|
if len(errs) != 0 {
|
||
|
|
t.Errorf("nil input: expected 0 errs, got %d", len(errs))
|
||
|
|
}
|
||
|
|
results, errs = mock.OCRRecognizeBatch(context.Background(), []image.Image{})
|
||
|
|
if len(results) != 0 || len(errs) != 0 {
|
||
|
|
t.Error("empty input: expected 0 results/errs")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestOCRRecognizeBatch_SingleImage(t *testing.T) {
|
||
|
|
mock := &MockDocAnalyzer{
|
||
|
|
Healthy: true,
|
||
|
|
OCRTexts: []OCRText{{Text: "hello", Confidence: 0.9}},
|
||
|
|
}
|
||
|
|
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
|
||
|
|
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy})
|
||
|
|
if len(results) != 1 {
|
||
|
|
t.Fatalf("expected 1 result, got %d", len(results))
|
||
|
|
}
|
||
|
|
if len(results[0]) != 1 || results[0][0].Text != "hello" {
|
||
|
|
t.Errorf("expected 'hello', got %v", results[0])
|
||
|
|
}
|
||
|
|
if errs[0] != nil {
|
||
|
|
t.Errorf("expected nil err, got %v", errs[0])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestOCRRecognizeBatch_MultipleImages(t *testing.T) {
|
||
|
|
mock := &MockDocAnalyzer{
|
||
|
|
Healthy: true,
|
||
|
|
OCRBatchTexts: [][]OCRText{
|
||
|
|
{{Text: "img0", Confidence: 0.9}},
|
||
|
|
{{Text: "img1", Confidence: 0.8}},
|
||
|
|
{{Text: "img2", Confidence: 0.7}},
|
||
|
|
},
|
||
|
|
}
|
||
|
|
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
|
||
|
|
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, dummy, dummy})
|
||
|
|
if len(results) != 3 {
|
||
|
|
t.Fatalf("expected 3 results, got %d", len(results))
|
||
|
|
}
|
||
|
|
for i, want := range []string{"img0", "img1", "img2"} {
|
||
|
|
if len(results[i]) != 1 || results[i][0].Text != want {
|
||
|
|
t.Errorf("image[%d]: expected %q, got %v", i, want, results[i])
|
||
|
|
}
|
||
|
|
if errs[i] != nil {
|
||
|
|
t.Errorf("image[%d]: expected nil err, got %v", i, errs[i])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestOCRRecognizeBatch_NilImage(t *testing.T) {
|
||
|
|
mock := &MockDocAnalyzer{
|
||
|
|
Healthy: true,
|
||
|
|
OCRTexts: []OCRText{{Text: "ok", Confidence: 0.9}},
|
||
|
|
}
|
||
|
|
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
|
||
|
|
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, nil, dummy})
|
||
|
|
if len(results) != 3 {
|
||
|
|
t.Fatalf("expected 3 results, got %d", len(results))
|
||
|
|
}
|
||
|
|
if len(results[0]) == 0 || results[0][0].Text != "ok" {
|
||
|
|
t.Errorf("image[0]: expected 'ok', got %v", results[0])
|
||
|
|
}
|
||
|
|
if results[1] != nil {
|
||
|
|
t.Errorf("image[1]: nil image should get nil result, got %v", results[1])
|
||
|
|
}
|
||
|
|
if errs[1] == nil {
|
||
|
|
t.Error("image[1]: nil image should get error")
|
||
|
|
}
|
||
|
|
if len(results[2]) == 0 || results[2][0].Text != "ok" {
|
||
|
|
t.Errorf("image[2]: expected 'ok' after nil, got %v", results[2])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestOCRRecognizeBatch_ErrorHandling(t *testing.T) {
|
||
|
|
mock := &MockDocAnalyzer{
|
||
|
|
Healthy: true,
|
||
|
|
OCRTexts: []OCRText{{Text: "ok", Confidence: 0.9}},
|
||
|
|
OCRBatchErr: func(i int) error {
|
||
|
|
if i == 1 {
|
||
|
|
return errors.New("simulated error")
|
||
|
|
}
|
||
|
|
return nil
|
||
|
|
},
|
||
|
|
}
|
||
|
|
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
|
||
|
|
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, dummy, dummy})
|
||
|
|
if len(results) != 3 {
|
||
|
|
t.Fatalf("expected 3 results, got %d", len(results))
|
||
|
|
}
|
||
|
|
// Image 0: OK
|
||
|
|
if errs[0] != nil {
|
||
|
|
t.Errorf("image[0]: expected nil err, got %v", errs[0])
|
||
|
|
}
|
||
|
|
// Image 1: error
|
||
|
|
if errs[1] == nil {
|
||
|
|
t.Error("image[1]: expected error")
|
||
|
|
}
|
||
|
|
// Image 2: OK (error only for index 1)
|
||
|
|
if errs[2] != nil {
|
||
|
|
t.Errorf("image[2]: expected nil err, got %v", errs[2])
|
||
|
|
}
|
||
|
|
// Results should still be returned alongside errors
|
||
|
|
if results[0] == nil || results[0][0].Text != "ok" {
|
||
|
|
t.Error("image[0]: result should be returned despite error on other image")
|
||
|
|
}
|
||
|
|
if results[2] == nil || results[2][0].Text != "ok" {
|
||
|
|
t.Error("image[2]: result should be returned despite error on other image")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestOCRRecognizeBatch_EmptyText(t *testing.T) {
|
||
|
|
mock := &MockDocAnalyzer{
|
||
|
|
Healthy: true,
|
||
|
|
OCRTexts: []OCRText{}, // empty — simulate no text recognized
|
||
|
|
}
|
||
|
|
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
|
||
|
|
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy})
|
||
|
|
if len(results) != 1 {
|
||
|
|
t.Fatalf("expected 1 result, got %d", len(results))
|
||
|
|
}
|
||
|
|
if len(results[0]) != 0 {
|
||
|
|
t.Errorf("expected empty texts, got %v", results[0])
|
||
|
|
}
|
||
|
|
if errs[0] != nil {
|
||
|
|
t.Errorf("expected nil err for empty text, got %v", errs[0])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestOCRRecognizeBatch_FallbackToOCRTexts(t *testing.T) {
|
||
|
|
// When OCRBatchTexts is nil, fall back to OCRTexts for every image.
|
||
|
|
mock := &MockDocAnalyzer{
|
||
|
|
Healthy: true,
|
||
|
|
OCRTexts: []OCRText{{Text: "default", Confidence: 0.5}},
|
||
|
|
}
|
||
|
|
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
|
||
|
|
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, dummy, dummy})
|
||
|
|
if len(results) != 3 {
|
||
|
|
t.Fatalf("expected 3 results, got %d", len(results))
|
||
|
|
}
|
||
|
|
for i := 0; i < 3; i++ {
|
||
|
|
if len(results[i]) != 1 || results[i][0].Text != "default" {
|
||
|
|
t.Errorf("image[%d]: expected 'default', got %v", i, results[i])
|
||
|
|
}
|
||
|
|
if errs[i] != nil {
|
||
|
|
t.Errorf("image[%d]: expected nil err, got %v", i, errs[i])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestOCRRecognizeBatch_PartialBatchTexts(t *testing.T) {
|
||
|
|
// OCRBatchTexts shorter than images — remaining fall back to OCRTexts.
|
||
|
|
mock := &MockDocAnalyzer{
|
||
|
|
Healthy: true,
|
||
|
|
OCRTexts: []OCRText{{Text: "fallback", Confidence: 0.5}},
|
||
|
|
OCRBatchTexts: [][]OCRText{
|
||
|
|
{{Text: "custom0", Confidence: 0.9}},
|
||
|
|
},
|
||
|
|
}
|
||
|
|
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
|
||
|
|
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, dummy, dummy})
|
||
|
|
if len(results) != 3 {
|
||
|
|
t.Fatalf("expected 3 results, got %d", len(results))
|
||
|
|
}
|
||
|
|
if results[0][0].Text != "custom0" {
|
||
|
|
t.Errorf("image[0]: expected 'custom0', got %q", results[0][0].Text)
|
||
|
|
}
|
||
|
|
if results[1][0].Text != "fallback" {
|
||
|
|
t.Errorf("image[1]: expected 'fallback', got %q", results[1][0].Text)
|
||
|
|
}
|
||
|
|
if results[2][0].Text != "fallback" {
|
||
|
|
t.Errorf("image[2]: expected 'fallback', got %q", results[2][0].Text)
|
||
|
|
}
|
||
|
|
if errs[0] != nil || errs[1] != nil || errs[2] != nil {
|
||
|
|
t.Error("all errors should be nil")
|
||
|
|
}
|
||
|
|
}
|