From 0af5d43e8dde9a7c03313d29375fe64e9759cbbc Mon Sep 17 00:00:00 2001 From: Harsh Kashyap Date: Thu, 25 Jun 2026 16:42:57 +0530 Subject: [PATCH] fix(deepdoc): keep zero and false Excel cells in __call__ (#16318) --- deepdoc/parser/excel_parser.py | 2 +- .../deepdoc/parser/test_excel_parser.py | 35 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py index 21e9dc1276..bb28b792aa 100644 --- a/deepdoc/parser/excel_parser.py +++ b/deepdoc/parser/excel_parser.py @@ -283,7 +283,7 @@ class RAGFlowExcelParser: for r in list(rows[1:]): fields = [] for i, c in enumerate(r): - if not c.value: + if c.value is None: continue t = str(ti[i].value) if i < len(ti) else "" t += (":" if t else "") + str(c.value) diff --git a/test/unit_test/deepdoc/parser/test_excel_parser.py b/test/unit_test/deepdoc/parser/test_excel_parser.py index 9036284643..a54bdd4cbf 100644 --- a/test/unit_test/deepdoc/parser/test_excel_parser.py +++ b/test/unit_test/deepdoc/parser/test_excel_parser.py @@ -68,6 +68,41 @@ def _chunk_has_no_data_cells(chunk): return "" not in chunk and "" not in chunk +def _make_xlsx_with_zero_and_false(): + from openpyxl import Workbook + + wb = Workbook() + ws = wb.active + ws.append(["Amount", "Active"]) + ws.append([0, False]) + buf = BytesIO() + wb.save(buf) + buf.seek(0) + return buf.read() + + +@pytest.mark.p2 +def test_call_keeps_zero_and_false_cells(): + lines = RAGFlowExcelParser()(_make_xlsx_with_zero_and_false()) + assert len(lines) == 1 + assert "0" in lines[0] + assert "False" in lines[0] + + +@pytest.mark.p2 +def test_call_keeps_empty_string_cells(monkeypatch): + from openpyxl import Workbook + + wb = Workbook() + ws = wb.active + ws.append(["Note"]) + ws.append([""]) + + monkeypatch.setattr(RAGFlowExcelParser, "_load_excel_to_workbook", lambda _file: wb) + lines = RAGFlowExcelParser()(b"unused") + assert lines == ["Note:"] + + @pytest.mark.p2 def test_exact_multiple_does_not_emit_header_only_chunk(): # 12 data rows with chunk_rows=12 (the value rag/app/naive.py uses).