mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
refactor: let excel use lazy image loader (#13558)
### What problem does this PR solve? let excel use lazy image loader ### Type of change - [x] Refactoring --------- Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
This commit is contained in:
@@ -26,7 +26,7 @@ from docx.image.exceptions import (
|
||||
UnexpectedEndOfFileError,
|
||||
UnrecognizedImageError,
|
||||
)
|
||||
from rag.utils.lazy_image import LazyDocxImage
|
||||
from rag.utils.lazy_image import LazyImage
|
||||
|
||||
class RAGFlowDocxParser:
|
||||
def get_picture(self, document, paragraph):
|
||||
@@ -66,7 +66,7 @@ class RAGFlowDocxParser:
|
||||
image_blobs.append(image_blob)
|
||||
if not image_blobs:
|
||||
return None
|
||||
return LazyDocxImage(image_blobs)
|
||||
return LazyImage(image_blobs)
|
||||
|
||||
|
||||
def __extract_table_content(self, tb):
|
||||
|
||||
@@ -18,9 +18,9 @@ from io import BytesIO
|
||||
|
||||
import pandas as pd
|
||||
from openpyxl import Workbook, load_workbook
|
||||
from PIL import Image
|
||||
|
||||
from rag.nlp import find_codec
|
||||
from rag.utils.lazy_image import LazyImage
|
||||
|
||||
# copied from `/openpyxl/cell/cell.py`
|
||||
ILLEGAL_CHARACTERS_RE = re.compile(r"[\000-\010]|[\013-\014]|[\016-\037]")
|
||||
@@ -122,7 +122,7 @@ class RAGFlowExcelParser:
|
||||
for img in images:
|
||||
try:
|
||||
img_bytes = img._data()
|
||||
pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
|
||||
lazy_img = LazyImage([img_bytes])
|
||||
|
||||
anchor = img.anchor
|
||||
if hasattr(anchor, "_from") and hasattr(anchor, "_to"):
|
||||
@@ -139,7 +139,7 @@ class RAGFlowExcelParser:
|
||||
|
||||
item = {
|
||||
"sheet": sheetname or ws.title,
|
||||
"image": pil_img,
|
||||
"image": lazy_img,
|
||||
"image_description": "",
|
||||
"row_from": r1,
|
||||
"col_from": c1,
|
||||
|
||||
@@ -75,7 +75,7 @@ def vision_figure_parser_figure_xlsx_wrapper(images,callback=None, **kwargs):
|
||||
vision_model = None
|
||||
if vision_model:
|
||||
figures_data = [((
|
||||
img["image"], # Image.Image
|
||||
img["image"], # Image.Image or LazyImage (converted by ensure_pil_image)
|
||||
[img["image_description"]] # description list (must be list)
|
||||
),
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user