mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Refa:improve excel parser logic (#13372)
### What problem does this PR solve? improve excel parser logic ### Type of change - [x] Refactoring
This commit is contained in:
@@ -74,9 +74,16 @@ class RAGFlowExcelParser:
|
||||
|
||||
return df.apply(lambda col: col.map(clean_string))
|
||||
|
||||
@staticmethod
|
||||
def _fill_worksheet_from_dataframe(ws, df: pd.DataFrame):
|
||||
for col_num, column_name in enumerate(df.columns, 1):
|
||||
ws.cell(row=1, column=col_num, value=column_name)
|
||||
for row_num, row in enumerate(df.values, 2):
|
||||
for col_num, value in enumerate(row, 1):
|
||||
ws.cell(row=row_num, column=col_num, value=value)
|
||||
|
||||
@staticmethod
|
||||
def _dataframe_to_workbook(df):
|
||||
# if contains multiple sheets use _dataframes_to_workbook
|
||||
if isinstance(df, dict) and len(df) > 1:
|
||||
return RAGFlowExcelParser._dataframes_to_workbook(df)
|
||||
|
||||
@@ -84,30 +91,19 @@ class RAGFlowExcelParser:
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Data"
|
||||
|
||||
for col_num, column_name in enumerate(df.columns, 1):
|
||||
ws.cell(row=1, column=col_num, value=column_name)
|
||||
|
||||
for row_num, row in enumerate(df.values, 2):
|
||||
for col_num, value in enumerate(row, 1):
|
||||
ws.cell(row=row_num, column=col_num, value=value)
|
||||
|
||||
RAGFlowExcelParser._fill_worksheet_from_dataframe(ws, df)
|
||||
return wb
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _dataframes_to_workbook(dfs: dict):
|
||||
wb = Workbook()
|
||||
default_sheet = wb.active
|
||||
wb.remove(default_sheet)
|
||||
|
||||
|
||||
for sheet_name, df in dfs.items():
|
||||
df = RAGFlowExcelParser._clean_dataframe(df)
|
||||
ws = wb.create_sheet(title=sheet_name)
|
||||
for col_num, column_name in enumerate(df.columns, 1):
|
||||
ws.cell(row=1, column=col_num, value=column_name)
|
||||
for row_num, row in enumerate(df.values, 2):
|
||||
for col_num, value in enumerate(row, 1):
|
||||
ws.cell(row=row_num, column=col_num, value=value)
|
||||
RAGFlowExcelParser._fill_worksheet_from_dataframe(ws, df)
|
||||
return wb
|
||||
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user