mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Refactor: migrate pdf_parser.py to golang (#16323)
### What problem does this PR solve? Http API based on onnx model. pdf_parser.py to golang ### Type of change - [x] Refactoring
This commit is contained in:
38
internal/deepdoc/parser/pdf/renderer.go
Normal file
38
internal/deepdoc/parser/pdf/renderer.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"image"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// renderFn is the active page-rendering function. It defaults to
|
||||
// fallbackRender (pure Go, engine-provided RenderPageImage). When
|
||||
// pdfium is available (*_cgo build), renderer_pdfium.go replaces it
|
||||
// with pdfiumRender via its init().
|
||||
var renderFn = fallbackRender
|
||||
|
||||
// renderPageToImage renders a page at 216 DPI for downstream DLA/TSR/OCR.
|
||||
func renderPageToImage(engine PDFEngine, pageNum int) (image.Image, error) {
|
||||
return renderFn(engine, pageNum)
|
||||
}
|
||||
|
||||
// fallbackRender uses the engine's own RenderPageImage (no C dependency).
|
||||
func fallbackRender(engine PDFEngine, pageNum int) (image.Image, error) {
|
||||
img, err := engine.RenderPageImage(pageNum, dlaDPI)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Guard against typed-nil (e.g. (*image.RGBA)(nil) returned as non-nil
|
||||
// interface). The plain img==nil check misses that case.
|
||||
if img == nil || reflect.ValueOf(img).IsNil() {
|
||||
return nil, ErrNoPDFData
|
||||
}
|
||||
return img, nil
|
||||
}
|
||||
|
||||
// ErrNoPDFData is returned when the engine has no raw PDF bytes to render.
|
||||
var ErrNoPDFData = &pdfError{"engine has no raw PDF data"}
|
||||
|
||||
type pdfError struct{ msg string }
|
||||
|
||||
func (e *pdfError) Error() string { return e.msg }
|
||||
Reference in New Issue
Block a user