mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
fix(excel_parser): keep zero-valued cells when building Excel text chunks (#16287)
This commit is contained in:
@@ -283,7 +283,7 @@ class RAGFlowExcelParser:
|
||||
for r in list(rows[1:]):
|
||||
fields = []
|
||||
for i, c in enumerate(r):
|
||||
if not c.value:
|
||||
if c.value is None or str(c.value).strip() == "":
|
||||
continue
|
||||
t = str(ti[i].value) if i < len(ti) else ""
|
||||
t += (":" if t else "") + str(c.value)
|
||||
|
||||
@@ -90,3 +90,34 @@ def test_non_multiple_unchanged():
|
||||
chunks = RAGFlowExcelParser().html(_make_xlsx(13), chunk_rows=12)
|
||||
assert len(chunks) == 2
|
||||
assert all(not _chunk_has_no_data_cells(c) for c in chunks)
|
||||
|
||||
|
||||
def _make_xlsx_with_values(header, row):
|
||||
from openpyxl import Workbook
|
||||
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.append(header)
|
||||
ws.append(row)
|
||||
buf = BytesIO()
|
||||
wb.save(buf)
|
||||
buf.seek(0)
|
||||
return buf.read()
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_call_keeps_zero_valued_cells():
|
||||
# __call__ produces the text used for indexing. A numeric 0 (and 0.0 / False)
|
||||
# is real data, not an empty cell, so it must survive. The header is only
|
||||
# emitted alongside a kept value, so a dropped 0 also loses its "stock" label.
|
||||
lines = RAGFlowExcelParser()(_make_xlsx_with_values(["name", "stock"], ["widget", 0]))
|
||||
joined = " ".join(lines)
|
||||
assert "stock" in joined and "0" in joined, lines
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_call_skips_truly_empty_cells():
|
||||
# None / empty-string cells carry no value and should still be skipped.
|
||||
lines = RAGFlowExcelParser()(_make_xlsx_with_values(["name", "note"], ["widget", None]))
|
||||
joined = " ".join(lines)
|
||||
assert "note" not in joined, lines
|
||||
|
||||
Reference in New Issue
Block a user