mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
### What problem does this PR solve? Http API based on onnx model. pdf_parser.py to golang ### Type of change - [x] Refactoring
36 lines
874 B
Go
36 lines
874 B
Go
//go:build cgo
|
|
|
|
package parser
|
|
|
|
import (
|
|
"image"
|
|
|
|
"ragflow/internal/deepdoc/parser/pdf/pdfium"
|
|
)
|
|
|
|
// pdfiumRender uses the pdfium C library for higher-quality rasterisation
|
|
// (AA, hinting) which is essential for downstream OCR/DLA accuracy on
|
|
// scanned or low-quality PDFs.
|
|
func pdfiumRender(engine PDFEngine, pageNum int) (image.Image, error) {
|
|
raw := engine.RawData()
|
|
if raw == nil {
|
|
// PythonCharEngine and mocks don't carry PDF bytes —
|
|
// fall back to the engine's own RenderPageImage.
|
|
return fallbackRender(engine, pageNum)
|
|
}
|
|
// Guard against typed nil: (*image.RGBA)(nil) wrapped as non-nil interface
|
|
// would panic on downstream .Bounds() / .At() calls.
|
|
img, err := pdfium.RenderPage(raw, pageNum, 216)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if img == nil {
|
|
return nil, ErrNoPDFData
|
|
}
|
|
return img, nil
|
|
}
|
|
|
|
func init() {
|
|
renderFn = pdfiumRender
|
|
}
|