Files
ragflow/internal/deepdoc/parser/pdf/ocr_recognize_batch_test.go

196 lines
6.0 KiB
Go
Raw Normal View History

//go:build cgo
package parser
import (
"context"
"errors"
"image"
"testing"
)
func TestOCRRecognizeBatch_EmptyList(t *testing.T) {
mock := &MockDocAnalyzer{Healthy: true}
results, errs := mock.OCRRecognizeBatch(context.Background(), nil)
if len(results) != 0 {
t.Errorf("nil input: expected 0 results, got %d", len(results))
}
if len(errs) != 0 {
t.Errorf("nil input: expected 0 errs, got %d", len(errs))
}
results, errs = mock.OCRRecognizeBatch(context.Background(), []image.Image{})
if len(results) != 0 || len(errs) != 0 {
t.Error("empty input: expected 0 results/errs")
}
}
func TestOCRRecognizeBatch_SingleImage(t *testing.T) {
mock := &MockDocAnalyzer{
Healthy: true,
OCRTexts: []OCRText{{Text: "hello", Confidence: 0.9}},
}
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy})
if len(results) != 1 {
t.Fatalf("expected 1 result, got %d", len(results))
}
if len(results[0]) != 1 || results[0][0].Text != "hello" {
t.Errorf("expected 'hello', got %v", results[0])
}
if errs[0] != nil {
t.Errorf("expected nil err, got %v", errs[0])
}
}
func TestOCRRecognizeBatch_MultipleImages(t *testing.T) {
mock := &MockDocAnalyzer{
Healthy: true,
OCRBatchTexts: [][]OCRText{
{{Text: "img0", Confidence: 0.9}},
{{Text: "img1", Confidence: 0.8}},
{{Text: "img2", Confidence: 0.7}},
},
}
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, dummy, dummy})
if len(results) != 3 {
t.Fatalf("expected 3 results, got %d", len(results))
}
for i, want := range []string{"img0", "img1", "img2"} {
if len(results[i]) != 1 || results[i][0].Text != want {
t.Errorf("image[%d]: expected %q, got %v", i, want, results[i])
}
if errs[i] != nil {
t.Errorf("image[%d]: expected nil err, got %v", i, errs[i])
}
}
}
func TestOCRRecognizeBatch_NilImage(t *testing.T) {
mock := &MockDocAnalyzer{
Healthy: true,
OCRTexts: []OCRText{{Text: "ok", Confidence: 0.9}},
}
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, nil, dummy})
if len(results) != 3 {
t.Fatalf("expected 3 results, got %d", len(results))
}
if len(results[0]) == 0 || results[0][0].Text != "ok" {
t.Errorf("image[0]: expected 'ok', got %v", results[0])
}
if results[1] != nil {
t.Errorf("image[1]: nil image should get nil result, got %v", results[1])
}
if errs[1] == nil {
t.Error("image[1]: nil image should get error")
}
if len(results[2]) == 0 || results[2][0].Text != "ok" {
t.Errorf("image[2]: expected 'ok' after nil, got %v", results[2])
}
}
func TestOCRRecognizeBatch_ErrorHandling(t *testing.T) {
mock := &MockDocAnalyzer{
Healthy: true,
OCRTexts: []OCRText{{Text: "ok", Confidence: 0.9}},
OCRBatchErr: func(i int) error {
if i == 1 {
return errors.New("simulated error")
}
return nil
},
}
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, dummy, dummy})
if len(results) != 3 {
t.Fatalf("expected 3 results, got %d", len(results))
}
// Image 0: OK
if errs[0] != nil {
t.Errorf("image[0]: expected nil err, got %v", errs[0])
}
// Image 1: error
if errs[1] == nil {
t.Error("image[1]: expected error")
}
// Image 2: OK (error only for index 1)
if errs[2] != nil {
t.Errorf("image[2]: expected nil err, got %v", errs[2])
}
// Results should still be returned alongside errors
if results[0] == nil || results[0][0].Text != "ok" {
t.Error("image[0]: result should be returned despite error on other image")
}
if results[2] == nil || results[2][0].Text != "ok" {
t.Error("image[2]: result should be returned despite error on other image")
}
}
func TestOCRRecognizeBatch_EmptyText(t *testing.T) {
mock := &MockDocAnalyzer{
Healthy: true,
OCRTexts: []OCRText{}, // empty — simulate no text recognized
}
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy})
if len(results) != 1 {
t.Fatalf("expected 1 result, got %d", len(results))
}
if len(results[0]) != 0 {
t.Errorf("expected empty texts, got %v", results[0])
}
if errs[0] != nil {
t.Errorf("expected nil err for empty text, got %v", errs[0])
}
}
func TestOCRRecognizeBatch_FallbackToOCRTexts(t *testing.T) {
// When OCRBatchTexts is nil, fall back to OCRTexts for every image.
mock := &MockDocAnalyzer{
Healthy: true,
OCRTexts: []OCRText{{Text: "default", Confidence: 0.5}},
}
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, dummy, dummy})
if len(results) != 3 {
t.Fatalf("expected 3 results, got %d", len(results))
}
for i := 0; i < 3; i++ {
if len(results[i]) != 1 || results[i][0].Text != "default" {
t.Errorf("image[%d]: expected 'default', got %v", i, results[i])
}
if errs[i] != nil {
t.Errorf("image[%d]: expected nil err, got %v", i, errs[i])
}
}
}
func TestOCRRecognizeBatch_PartialBatchTexts(t *testing.T) {
// OCRBatchTexts shorter than images — remaining fall back to OCRTexts.
mock := &MockDocAnalyzer{
Healthy: true,
OCRTexts: []OCRText{{Text: "fallback", Confidence: 0.5}},
OCRBatchTexts: [][]OCRText{
{{Text: "custom0", Confidence: 0.9}},
},
}
dummy := image.NewRGBA(image.Rect(0, 0, 10, 10))
results, errs := mock.OCRRecognizeBatch(context.Background(), []image.Image{dummy, dummy, dummy})
if len(results) != 3 {
t.Fatalf("expected 3 results, got %d", len(results))
}
if results[0][0].Text != "custom0" {
t.Errorf("image[0]: expected 'custom0', got %q", results[0][0].Text)
}
if results[1][0].Text != "fallback" {
t.Errorf("image[1]: expected 'fallback', got %q", results[1][0].Text)
}
if results[2][0].Text != "fallback" {
t.Errorf("image[2]: expected 'fallback', got %q", results[2][0].Text)
}
if errs[0] != nil || errs[1] != nil || errs[2] != nil {
t.Error("all errors should be nil")
}
}