mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Revert "fix(deepdoc): keep zero and false Excel cells in __call__" (#16366)
Reverts infiniflow/ragflow#16318
This commit is contained in:
@@ -283,7 +283,7 @@ class RAGFlowExcelParser:
|
||||
for r in list(rows[1:]):
|
||||
fields = []
|
||||
for i, c in enumerate(r):
|
||||
if c.value is None:
|
||||
if not c.value:
|
||||
continue
|
||||
t = str(ti[i].value) if i < len(ti) else ""
|
||||
t += (":" if t else "") + str(c.value)
|
||||
|
||||
@@ -68,41 +68,6 @@ def _chunk_has_no_data_cells(chunk):
|
||||
return "<td>" not in chunk and "<td></td>" not in chunk
|
||||
|
||||
|
||||
def _make_xlsx_with_zero_and_false():
|
||||
from openpyxl import Workbook
|
||||
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.append(["Amount", "Active"])
|
||||
ws.append([0, False])
|
||||
buf = BytesIO()
|
||||
wb.save(buf)
|
||||
buf.seek(0)
|
||||
return buf.read()
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_call_keeps_zero_and_false_cells():
|
||||
lines = RAGFlowExcelParser()(_make_xlsx_with_zero_and_false())
|
||||
assert len(lines) == 1
|
||||
assert "0" in lines[0]
|
||||
assert "False" in lines[0]
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_call_keeps_empty_string_cells(monkeypatch):
|
||||
from openpyxl import Workbook
|
||||
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.append(["Note"])
|
||||
ws.append([""])
|
||||
|
||||
monkeypatch.setattr(RAGFlowExcelParser, "_load_excel_to_workbook", lambda _file: wb)
|
||||
lines = RAGFlowExcelParser()(b"unused")
|
||||
assert lines == ["Note:"]
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_exact_multiple_does_not_emit_header_only_chunk():
|
||||
# 12 data rows with chunk_rows=12 (the value rag/app/naive.py uses).
|
||||
|
||||
Reference in New Issue
Block a user