Files
ragflow/web/src/utils/table-column-extract.ts
Ahmad Intisar e6068a7f7e Fix: table parser metadata (#15127)
### What problem does this PR solve?

This PR improves the table upload flow for CSV/Excel files by allowing
table column role configuration at upload time.

Previously, users had to:
1. Upload and parse a table file.
2. Open parser settings and manually set table column roles.
3. Re-parse the file for the roles to take effect.

This was inefficient and required an unnecessary second parse.

With this change:
1. When the knowledge base uses table parsing, the upload dialog
extracts CSV/Excel headers client-side.
2. Users can choose Auto mode or Manual mode.
3. In Manual mode, users can assign per-column roles before upload.
4. The selected parser config is sent with the upload request and
applied server-side during document creation.

Result: configured table column roles are applied from the first parse.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Co-authored-by: Ahmad Intisar <ahmadintisar@Ahmads-MacBook-M4-Pro.local>
2026-05-25 16:05:38 +08:00

77 lines
2.1 KiB
TypeScript

import Papa from 'papaparse';
import * as XLSX from 'xlsx';
/**
* Extracts column headers from a CSV or Excel file.
* Returns an empty array if the file type is not supported or headers cannot be read.
*/
export async function extractTableColumns(file: File): Promise<string[]> {
const ext = file.name.split('.').pop()?.toLowerCase() ?? '';
if (ext === 'csv') {
return extractCsvColumns(file);
}
if (['xlsx', 'xls'].includes(ext)) {
return extractExcelColumns(file);
}
return [];
}
function extractCsvColumns(file: File): Promise<string[]> {
return new Promise((resolve) => {
Papa.parse(file, {
preview: 1, // Only read the first row (header)
header: true,
skipEmptyLines: true,
complete(results) {
const fields = results.meta?.fields ?? [];
resolve(fields.filter((f) => f.trim().length > 0));
},
error() {
resolve([]);
},
});
});
}
function extractExcelColumns(file: File): Promise<string[]> {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onload = (e) => {
try {
const data = new Uint8Array(e.target?.result as ArrayBuffer);
const workbook = XLSX.read(data, { type: 'array', sheetRows: 1 });
const firstSheetName = workbook.SheetNames[0];
if (!firstSheetName) {
resolve([]);
return;
}
const sheet = workbook.Sheets[firstSheetName];
const rows = XLSX.utils.sheet_to_json<string[]>(sheet, { header: 1 });
if (rows.length > 0) {
const headers = rows[0]
.map((h) => String(h ?? '').trim())
.filter((h) => h.length > 0);
resolve(headers);
} else {
resolve([]);
}
} catch {
resolve([]);
}
};
reader.onerror = () => resolve([]);
reader.readAsArrayBuffer(file);
});
}
/**
* Check if a file is a table file (CSV or Excel).
*/
export function isTableFile(file: File): boolean {
const ext = file.name.split('.').pop()?.toLowerCase() ?? '';
return ['csv', 'xlsx', 'xls'].includes(ext);
}