From 5defb4e7d65da0e94d68459294b306cf15c17815 Mon Sep 17 00:00:00 2001 From: Wang Qi Date: Thu, 25 Jun 2026 19:56:47 +0800 Subject: [PATCH] Revert "fix(deepdoc): keep zero and false Excel cells in __call__" (#16366) Reverts infiniflow/ragflow#16318 --- deepdoc/parser/excel_parser.py | 2 +- .../deepdoc/parser/test_excel_parser.py | 35 ------------------- 2 files changed, 1 insertion(+), 36 deletions(-) diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py index bb28b792aa..21e9dc1276 100644 --- a/deepdoc/parser/excel_parser.py +++ b/deepdoc/parser/excel_parser.py @@ -283,7 +283,7 @@ class RAGFlowExcelParser: for r in list(rows[1:]): fields = [] for i, c in enumerate(r): - if c.value is None: + if not c.value: continue t = str(ti[i].value) if i < len(ti) else "" t += (":" if t else "") + str(c.value) diff --git a/test/unit_test/deepdoc/parser/test_excel_parser.py b/test/unit_test/deepdoc/parser/test_excel_parser.py index a54bdd4cbf..9036284643 100644 --- a/test/unit_test/deepdoc/parser/test_excel_parser.py +++ b/test/unit_test/deepdoc/parser/test_excel_parser.py @@ -68,41 +68,6 @@ def _chunk_has_no_data_cells(chunk): return "" not in chunk and "" not in chunk -def _make_xlsx_with_zero_and_false(): - from openpyxl import Workbook - - wb = Workbook() - ws = wb.active - ws.append(["Amount", "Active"]) - ws.append([0, False]) - buf = BytesIO() - wb.save(buf) - buf.seek(0) - return buf.read() - - -@pytest.mark.p2 -def test_call_keeps_zero_and_false_cells(): - lines = RAGFlowExcelParser()(_make_xlsx_with_zero_and_false()) - assert len(lines) == 1 - assert "0" in lines[0] - assert "False" in lines[0] - - -@pytest.mark.p2 -def test_call_keeps_empty_string_cells(monkeypatch): - from openpyxl import Workbook - - wb = Workbook() - ws = wb.active - ws.append(["Note"]) - ws.append([""]) - - monkeypatch.setattr(RAGFlowExcelParser, "_load_excel_to_workbook", lambda _file: wb) - lines = RAGFlowExcelParser()(b"unused") - assert lines == ["Note:"] - - @pytest.mark.p2 def test_exact_multiple_does_not_emit_header_only_chunk(): # 12 data rows with chunk_rows=12 (the value rag/app/naive.py uses).