Refactor: migrate pdf_parser.py to golang (#16323)

### What problem does this PR solve?

Http API based on onnx model.
pdf_parser.py to golang

### Type of change

- [x] Refactoring
This commit is contained in:
Jack
2026-06-25 20:16:16 +08:00
committed by GitHub
parent c7052f4dd1
commit 304d9e02bb
98 changed files with 24591 additions and 8 deletions

View File

@@ -0,0 +1,38 @@
package parser
import (
"image"
"reflect"
)
// renderFn is the active page-rendering function. It defaults to
// fallbackRender (pure Go, engine-provided RenderPageImage). When
// pdfium is available (*_cgo build), renderer_pdfium.go replaces it
// with pdfiumRender via its init().
var renderFn = fallbackRender
// renderPageToImage renders a page at 216 DPI for downstream DLA/TSR/OCR.
func renderPageToImage(engine PDFEngine, pageNum int) (image.Image, error) {
return renderFn(engine, pageNum)
}
// fallbackRender uses the engine's own RenderPageImage (no C dependency).
func fallbackRender(engine PDFEngine, pageNum int) (image.Image, error) {
img, err := engine.RenderPageImage(pageNum, dlaDPI)
if err != nil {
return nil, err
}
// Guard against typed-nil (e.g. (*image.RGBA)(nil) returned as non-nil
// interface). The plain img==nil check misses that case.
if img == nil || reflect.ValueOf(img).IsNil() {
return nil, ErrNoPDFData
}
return img, nil
}
// ErrNoPDFData is returned when the engine has no raw PDF bytes to render.
var ErrNoPDFData = &pdfError{"engine has no raw PDF data"}
type pdfError struct{ msg string }
func (e *pdfError) Error() string { return e.msg }