# # Copyright 2025 The InfiniFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import importlib.util import os import sys from io import BytesIO from unittest import mock import pytest # Import RAGFlowExcelParser directly by file path to avoid triggering # deepdoc/parser/__init__.py and rag.nlp, which pull in heavy dependencies. for _m in ["pandas", "rag.nlp", "rag.utils", "rag.utils.lazy_image"]: if _m not in sys.modules: sys.modules[_m] = mock.MagicMock() def _find_project_root(marker="pyproject.toml"): d = os.path.dirname(os.path.abspath(__file__)) while d != os.path.dirname(d): if os.path.exists(os.path.join(d, marker)): return d d = os.path.dirname(d) return None _PROJECT_ROOT = _find_project_root() _spec = importlib.util.spec_from_file_location( "deepdoc.parser.excel_parser", os.path.join(_PROJECT_ROOT, "deepdoc", "parser", "excel_parser.py"), ) _mod = importlib.util.module_from_spec(_spec) sys.modules["deepdoc.parser.excel_parser"] = _mod _spec.loader.exec_module(_mod) RAGFlowExcelParser = _mod.RAGFlowExcelParser def _make_xlsx(n_data_rows): from openpyxl import Workbook wb = Workbook() ws = wb.active ws.append(["H1", "H2"]) for i in range(n_data_rows): ws.append([f"a{i}", f"b{i}"]) buf = BytesIO() wb.save(buf) buf.seek(0) return buf.read() def _chunk_has_no_data_cells(chunk): return "