Revert "fix(deepdoc): keep zero and false Excel cells in __call__" (#16366)

Reverts infiniflow/ragflow#16318
This commit is contained in:
Wang Qi
2026-06-25 19:56:47 +08:00
committed by GitHub
parent 8d3c3f868c
commit 5defb4e7d6
2 changed files with 1 additions and 36 deletions

View File

@@ -283,7 +283,7 @@ class RAGFlowExcelParser:
for r in list(rows[1:]):
fields = []
for i, c in enumerate(r):
if c.value is None:
if not c.value:
continue
t = str(ti[i].value) if i < len(ti) else ""
t += ("" if t else "") + str(c.value)

View File

@@ -68,41 +68,6 @@ def _chunk_has_no_data_cells(chunk):
return "<td>" not in chunk and "<td></td>" not in chunk
def _make_xlsx_with_zero_and_false():
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
ws.append(["Amount", "Active"])
ws.append([0, False])
buf = BytesIO()
wb.save(buf)
buf.seek(0)
return buf.read()
@pytest.mark.p2
def test_call_keeps_zero_and_false_cells():
lines = RAGFlowExcelParser()(_make_xlsx_with_zero_and_false())
assert len(lines) == 1
assert "0" in lines[0]
assert "False" in lines[0]
@pytest.mark.p2
def test_call_keeps_empty_string_cells(monkeypatch):
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
ws.append(["Note"])
ws.append([""])
monkeypatch.setattr(RAGFlowExcelParser, "_load_excel_to_workbook", lambda _file: wb)
lines = RAGFlowExcelParser()(b"unused")
assert lines == ["Note"]
@pytest.mark.p2
def test_exact_multiple_does_not_emit_header_only_chunk():
# 12 data rows with chunk_rows=12 (the value rag/app/naive.py uses).