Files
ragflow/internal/deepdoc/parser/pdf/renderer_pdfium.go
Jack 304d9e02bb Refactor: migrate pdf_parser.py to golang (#16323)
### What problem does this PR solve?

Http API based on onnx model.
pdf_parser.py to golang

### Type of change

- [x] Refactoring
2026-06-25 20:16:16 +08:00

36 lines
874 B
Go

//go:build cgo
package parser
import (
"image"
"ragflow/internal/deepdoc/parser/pdf/pdfium"
)
// pdfiumRender uses the pdfium C library for higher-quality rasterisation
// (AA, hinting) which is essential for downstream OCR/DLA accuracy on
// scanned or low-quality PDFs.
func pdfiumRender(engine PDFEngine, pageNum int) (image.Image, error) {
raw := engine.RawData()
if raw == nil {
// PythonCharEngine and mocks don't carry PDF bytes —
// fall back to the engine's own RenderPageImage.
return fallbackRender(engine, pageNum)
}
// Guard against typed nil: (*image.RGBA)(nil) wrapped as non-nil interface
// would panic on downstream .Bounds() / .At() calls.
img, err := pdfium.RenderPage(raw, pageNum, 216)
if err != nil {
return nil, err
}
if img == nil {
return nil, ErrNoPDFData
}
return img, nil
}
func init() {
renderFn = pdfiumRender
}