Refa:improve excel parser logic (#13372)

### What problem does this PR solve?

improve excel parser logic

### Type of change
- [x] Refactoring
This commit is contained in:
Stephen Hu
2026-03-04 18:00:17 +08:00
committed by GitHub
parent 6bb00e2762
commit c7d17c84b2

View File

@@ -74,9 +74,16 @@ class RAGFlowExcelParser:
return df.apply(lambda col: col.map(clean_string))
@staticmethod
def _fill_worksheet_from_dataframe(ws, df: pd.DataFrame):
for col_num, column_name in enumerate(df.columns, 1):
ws.cell(row=1, column=col_num, value=column_name)
for row_num, row in enumerate(df.values, 2):
for col_num, value in enumerate(row, 1):
ws.cell(row=row_num, column=col_num, value=value)
@staticmethod
def _dataframe_to_workbook(df):
# if contains multiple sheets use _dataframes_to_workbook
if isinstance(df, dict) and len(df) > 1:
return RAGFlowExcelParser._dataframes_to_workbook(df)
@@ -84,30 +91,19 @@ class RAGFlowExcelParser:
wb = Workbook()
ws = wb.active
ws.title = "Data"
for col_num, column_name in enumerate(df.columns, 1):
ws.cell(row=1, column=col_num, value=column_name)
for row_num, row in enumerate(df.values, 2):
for col_num, value in enumerate(row, 1):
ws.cell(row=row_num, column=col_num, value=value)
RAGFlowExcelParser._fill_worksheet_from_dataframe(ws, df)
return wb
@staticmethod
def _dataframes_to_workbook(dfs: dict):
wb = Workbook()
default_sheet = wb.active
wb.remove(default_sheet)
for sheet_name, df in dfs.items():
df = RAGFlowExcelParser._clean_dataframe(df)
ws = wb.create_sheet(title=sheet_name)
for col_num, column_name in enumerate(df.columns, 1):
ws.cell(row=1, column=col_num, value=column_name)
for row_num, row in enumerate(df.values, 2):
for col_num, value in enumerate(row, 1):
ws.cell(row=row_num, column=col_num, value=value)
RAGFlowExcelParser._fill_worksheet_from_dataframe(ws, df)
return wb
@staticmethod