From dd2c88b7682773f9038c4124e0fd492fe50e2fce Mon Sep 17 00:00:00 2001 From: Yash Raj Pandey <55940078+devYRPauli@users.noreply.github.com> Date: Thu, 25 Jun 2026 21:30:09 -0400 Subject: [PATCH] fix(excel_parser): keep zero-valued cells when building Excel text chunks (#16287) --- deepdoc/parser/excel_parser.py | 2 +- .../deepdoc/parser/test_excel_parser.py | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py index 21e9dc1276..019559d0d7 100644 --- a/deepdoc/parser/excel_parser.py +++ b/deepdoc/parser/excel_parser.py @@ -283,7 +283,7 @@ class RAGFlowExcelParser: for r in list(rows[1:]): fields = [] for i, c in enumerate(r): - if not c.value: + if c.value is None or str(c.value).strip() == "": continue t = str(ti[i].value) if i < len(ti) else "" t += (":" if t else "") + str(c.value) diff --git a/test/unit_test/deepdoc/parser/test_excel_parser.py b/test/unit_test/deepdoc/parser/test_excel_parser.py index 9036284643..9e3a2aa281 100644 --- a/test/unit_test/deepdoc/parser/test_excel_parser.py +++ b/test/unit_test/deepdoc/parser/test_excel_parser.py @@ -90,3 +90,34 @@ def test_non_multiple_unchanged(): chunks = RAGFlowExcelParser().html(_make_xlsx(13), chunk_rows=12) assert len(chunks) == 2 assert all(not _chunk_has_no_data_cells(c) for c in chunks) + + +def _make_xlsx_with_values(header, row): + from openpyxl import Workbook + + wb = Workbook() + ws = wb.active + ws.append(header) + ws.append(row) + buf = BytesIO() + wb.save(buf) + buf.seek(0) + return buf.read() + + +@pytest.mark.p2 +def test_call_keeps_zero_valued_cells(): + # __call__ produces the text used for indexing. A numeric 0 (and 0.0 / False) + # is real data, not an empty cell, so it must survive. The header is only + # emitted alongside a kept value, so a dropped 0 also loses its "stock" label. + lines = RAGFlowExcelParser()(_make_xlsx_with_values(["name", "stock"], ["widget", 0])) + joined = " ".join(lines) + assert "stock" in joined and "0" in joined, lines + + +@pytest.mark.p2 +def test_call_skips_truly_empty_cells(): + # None / empty-string cells carry no value and should still be skipped. + lines = RAGFlowExcelParser()(_make_xlsx_with_values(["name", "note"], ["widget", None])) + joined = " ".join(lines) + assert "note" not in joined, lines