refactor: let excel use lazy image loader (#13558)

### What problem does this PR solve?

let excel use lazy image loader

### Type of change

- [x] Refactoring

---------

Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
This commit is contained in:
Stephen Hu
2026-03-23 21:24:40 +08:00
committed by GitHub
parent f991cd362e
commit d32967eda8
7 changed files with 25 additions and 23 deletions

View File

@@ -26,7 +26,7 @@ from docx.image.exceptions import (
UnexpectedEndOfFileError,
UnrecognizedImageError,
)
from rag.utils.lazy_image import LazyDocxImage
from rag.utils.lazy_image import LazyImage
class RAGFlowDocxParser:
def get_picture(self, document, paragraph):
@@ -66,7 +66,7 @@ class RAGFlowDocxParser:
image_blobs.append(image_blob)
if not image_blobs:
return None
return LazyDocxImage(image_blobs)
return LazyImage(image_blobs)
def __extract_table_content(self, tb):

View File

@@ -18,9 +18,9 @@ from io import BytesIO
import pandas as pd
from openpyxl import Workbook, load_workbook
from PIL import Image
from rag.nlp import find_codec
from rag.utils.lazy_image import LazyImage
# copied from `/openpyxl/cell/cell.py`
ILLEGAL_CHARACTERS_RE = re.compile(r"[\000-\010]|[\013-\014]|[\016-\037]")
@@ -122,7 +122,7 @@ class RAGFlowExcelParser:
for img in images:
try:
img_bytes = img._data()
pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
lazy_img = LazyImage([img_bytes])
anchor = img.anchor
if hasattr(anchor, "_from") and hasattr(anchor, "_to"):
@@ -139,7 +139,7 @@ class RAGFlowExcelParser:
item = {
"sheet": sheetname or ws.title,
"image": pil_img,
"image": lazy_img,
"image_description": "",
"row_from": r1,
"col_from": c1,

View File

@@ -75,7 +75,7 @@ def vision_figure_parser_figure_xlsx_wrapper(images,callback=None, **kwargs):
vision_model = None
if vision_model:
figures_data = [((
img["image"], # Image.Image
img["image"], # Image.Image or LazyImage (converted by ensure_pil_image)
[img["image_description"]] # description list (must be list)
),
[