fix(excel_parser): keep zero-valued cells when building Excel text chunks (#16287)

This commit is contained in:
Yash Raj Pandey
2026-06-25 21:30:09 -04:00
committed by GitHub
parent 58da1d6bc3
commit dd2c88b768
2 changed files with 32 additions and 1 deletions

View File

@@ -283,7 +283,7 @@ class RAGFlowExcelParser:
for r in list(rows[1:]):
fields = []
for i, c in enumerate(r):
if not c.value:
if c.value is None or str(c.value).strip() == "":
continue
t = str(ti[i].value) if i < len(ti) else ""
t += ("" if t else "") + str(c.value)

View File

@@ -90,3 +90,34 @@ def test_non_multiple_unchanged():
chunks = RAGFlowExcelParser().html(_make_xlsx(13), chunk_rows=12)
assert len(chunks) == 2
assert all(not _chunk_has_no_data_cells(c) for c in chunks)
def _make_xlsx_with_values(header, row):
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
ws.append(header)
ws.append(row)
buf = BytesIO()
wb.save(buf)
buf.seek(0)
return buf.read()
@pytest.mark.p2
def test_call_keeps_zero_valued_cells():
# __call__ produces the text used for indexing. A numeric 0 (and 0.0 / False)
# is real data, not an empty cell, so it must survive. The header is only
# emitted alongside a kept value, so a dropped 0 also loses its "stock" label.
lines = RAGFlowExcelParser()(_make_xlsx_with_values(["name", "stock"], ["widget", 0]))
joined = " ".join(lines)
assert "stock" in joined and "0" in joined, lines
@pytest.mark.p2
def test_call_skips_truly_empty_cells():
# None / empty-string cells carry no value and should still be skipped.
lines = RAGFlowExcelParser()(_make_xlsx_with_values(["name", "note"], ["widget", None]))
joined = " ".join(lines)
assert "note" not in joined, lines