diff --git a/deepdoc/parser/mineru_parser.py b/deepdoc/parser/mineru_parser.py index d63a04c843..e334995a21 100644 --- a/deepdoc/parser/mineru_parser.py +++ b/deepdoc/parser/mineru_parser.py @@ -340,6 +340,11 @@ class MinerUParser(RAGFlowPdfParser): pn = [bx["page_idx"] + 1] positions = bx.get("bbox", (0, 0, 0, 0)) x0, top, x1, bott = positions + # Normalize flipped coordinates (MinerU may report inverted bbox for flipped images) + if x0 > x1: + x0, x1 = x1, x0 + if top > bott: + top, bott = bott, top if hasattr(self, "page_images") and self.page_images and len(self.page_images) > bx["page_idx"]: page_width, page_height = self.page_images[bx["page_idx"]].size @@ -429,6 +434,12 @@ class MinerUParser(RAGFlowPdfParser): img0 = self.page_images[pns[0]] x0, y0, x1, y1 = int(left), int(top), int(right), int(min(bottom, img0.size[1])) + if x0 > x1: + x0, x1 = x1, x0 + if y0 > y1: + y0, y1 = y1, y0 + if x1 <= x0 or y1 <= y0: + continue crop0 = img0.crop((x0, y0, x1, y1)) imgs.append(crop0) if 0 < ii < len(poss) - 1: @@ -442,6 +453,13 @@ class MinerUParser(RAGFlowPdfParser): continue page = self.page_images[pn] x0, y0, x1, y1 = int(left), 0, int(right), int(min(bottom, page.size[1])) + if x0 > x1: + x0, x1 = x1, x0 + if y0 > y1: + y0, y1 = y1, y0 + if x1 <= x0 or y1 <= y0: + bottom -= page.size[1] + continue cimgp = page.crop((x0, y0, x1, y1)) imgs.append(cimgp) if 0 < ii < len(poss) - 1: