mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-04 01:29:35 +08:00
### What problem does this PR solve? This PR improves the table upload flow for CSV/Excel files by allowing table column role configuration at upload time. Previously, users had to: 1. Upload and parse a table file. 2. Open parser settings and manually set table column roles. 3. Re-parse the file for the roles to take effect. This was inefficient and required an unnecessary second parse. With this change: 1. When the knowledge base uses table parsing, the upload dialog extracts CSV/Excel headers client-side. 2. Users can choose Auto mode or Manual mode. 3. In Manual mode, users can assign per-column roles before upload. 4. The selected parser config is sent with the upload request and applied server-side during document creation. Result: configured table column roles are applied from the first parse. ### Type of change - [x] New Feature (non-breaking change which adds functionality) Co-authored-by: Ahmad Intisar <ahmadintisar@Ahmads-MacBook-M4-Pro.local>
77 lines
2.1 KiB
TypeScript
77 lines
2.1 KiB
TypeScript
import Papa from 'papaparse';
|
|
import * as XLSX from 'xlsx';
|
|
|
|
/**
|
|
* Extracts column headers from a CSV or Excel file.
|
|
* Returns an empty array if the file type is not supported or headers cannot be read.
|
|
*/
|
|
export async function extractTableColumns(file: File): Promise<string[]> {
|
|
const ext = file.name.split('.').pop()?.toLowerCase() ?? '';
|
|
|
|
if (ext === 'csv') {
|
|
return extractCsvColumns(file);
|
|
}
|
|
|
|
if (['xlsx', 'xls'].includes(ext)) {
|
|
return extractExcelColumns(file);
|
|
}
|
|
|
|
return [];
|
|
}
|
|
|
|
function extractCsvColumns(file: File): Promise<string[]> {
|
|
return new Promise((resolve) => {
|
|
Papa.parse(file, {
|
|
preview: 1, // Only read the first row (header)
|
|
header: true,
|
|
skipEmptyLines: true,
|
|
complete(results) {
|
|
const fields = results.meta?.fields ?? [];
|
|
resolve(fields.filter((f) => f.trim().length > 0));
|
|
},
|
|
error() {
|
|
resolve([]);
|
|
},
|
|
});
|
|
});
|
|
}
|
|
|
|
function extractExcelColumns(file: File): Promise<string[]> {
|
|
return new Promise((resolve) => {
|
|
const reader = new FileReader();
|
|
reader.onload = (e) => {
|
|
try {
|
|
const data = new Uint8Array(e.target?.result as ArrayBuffer);
|
|
const workbook = XLSX.read(data, { type: 'array', sheetRows: 1 });
|
|
const firstSheetName = workbook.SheetNames[0];
|
|
if (!firstSheetName) {
|
|
resolve([]);
|
|
return;
|
|
}
|
|
const sheet = workbook.Sheets[firstSheetName];
|
|
const rows = XLSX.utils.sheet_to_json<string[]>(sheet, { header: 1 });
|
|
if (rows.length > 0) {
|
|
const headers = rows[0]
|
|
.map((h) => String(h ?? '').trim())
|
|
.filter((h) => h.length > 0);
|
|
resolve(headers);
|
|
} else {
|
|
resolve([]);
|
|
}
|
|
} catch {
|
|
resolve([]);
|
|
}
|
|
};
|
|
reader.onerror = () => resolve([]);
|
|
reader.readAsArrayBuffer(file);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Check if a file is a table file (CSV or Excel).
|
|
*/
|
|
export function isTableFile(file: File): boolean {
|
|
const ext = file.name.split('.').pop()?.toLowerCase() ?? '';
|
|
return ['csv', 'xlsx', 'xls'].includes(ext);
|
|
}
|