diff --git a/web/src/components/ui/multi-select.tsx b/web/src/components/ui/multi-select.tsx index fb95b95415..d2079b234b 100644 --- a/web/src/components/ui/multi-select.tsx +++ b/web/src/components/ui/multi-select.tsx @@ -239,6 +239,31 @@ export const MultiSelect = React.forwardRef< 'options' in option ? option.options : [option], ); }, [options]); + + const disabledValueSet = React.useMemo(() => { + return new Set( + flatOptions.filter((option) => option.disabled).map((option) => option.value), + ); + }, [flatOptions]); + + const preserveDisabledValues = React.useCallback( + (values: string[]) => { + const disabledSelectedValues = selectedValues.filter((value) => + disabledValueSet.has(value), + ); + + return Array.from( + new Set([...disabledSelectedValues, ...values]), + ); + }, + [disabledValueSet, selectedValues], + ); + + const canRemoveValue = React.useCallback( + (value: string) => !disabledValueSet.has(value), + [disabledValueSet], + ); + const handleInputKeyDown = ( event: React.KeyboardEvent, ) => { @@ -246,13 +271,26 @@ export const MultiSelect = React.forwardRef< setIsPopoverOpen(true); } else if (event.key === 'Backspace' && !event.currentTarget.value) { const newSelectedValues = [...selectedValues]; - newSelectedValues.pop(); + const removableIndex = [...newSelectedValues] + .reverse() + .findIndex((value) => canRemoveValue(value)); + if (removableIndex < 0) { + return; + } + newSelectedValues.splice( + newSelectedValues.length - 1 - removableIndex, + 1, + ); setSelectedValues(newSelectedValues); onValueChange(newSelectedValues); } }; const toggleOption = (option: string) => { + if (disabledValueSet.has(option)) { + return; + } + const newSelectedValues = selectedValues.includes(option) ? selectedValues.filter((value) => value !== option) : [...selectedValues, option]; @@ -261,8 +299,9 @@ export const MultiSelect = React.forwardRef< }; const handleClear = () => { - setSelectedValues([]); - onValueChange([]); + const nextValues = preserveDisabledValues([]); + setSelectedValues(nextValues); + onValueChange(nextValues); }; const handleTogglePopover = () => { @@ -270,7 +309,9 @@ export const MultiSelect = React.forwardRef< }; const clearExtraOptions = () => { - const newSelectedValues = selectedValues.slice(0, maxCount); + const newSelectedValues = preserveDisabledValues( + selectedValues.slice(0, maxCount), + ); setSelectedValues(newSelectedValues); onValueChange(newSelectedValues); }; @@ -279,7 +320,9 @@ export const MultiSelect = React.forwardRef< if (selectedValues.length === flatOptions.length) { handleClear(); } else { - const allValues = flatOptions.map((option) => option.value); + const allValues = preserveDisabledValues( + flatOptions.map((option) => option.value), + ); setSelectedValues(allValues); onValueChange(allValues); } @@ -325,13 +368,15 @@ export const MultiSelect = React.forwardRef<
{option?.label}
- { - event.stopPropagation(); - toggleOption(value); - }} - /> + {canRemoveValue(value) && ( + { + event.stopPropagation(); + toggleOption(value); + }} + /> + )} ); diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index e6f793b87a..357334abcf 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1429,6 +1429,13 @@ Example: Virtual Hosted Style`, pleaseUploadAtLeastOneFile: 'Please upload at least one file', }, flow: { + preprocess: { + preprocess: 'Preprocess', + mainContent: 'Main content', + abstract: 'Abstract', + author: 'Author', + sectionTitle: 'Section title', + }, autoPlay: 'Auto play audio', downloadFileTypeTip: 'The file type to download', downloadFileType: 'Download file type', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index dc1822bfce..01e5b16716 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -1209,6 +1209,13 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 pleaseUploadAtLeastOneFile: '请上传至少一个文件', }, flow: { + preprocess: { + preprocess: '预处理', + mainContent: '主内容', + abstract: '摘要', + author: '作者', + sectionTitle: '章节标题', + }, autoPlay: '自动播放', downloadFileTypeTip: '文件下载的类型', downloadFileType: '文件类型', diff --git a/web/src/pages/agent/constant/pipeline.tsx b/web/src/pages/agent/constant/pipeline.tsx index d2ba075022..31cd0cbec2 100644 --- a/web/src/pages/agent/constant/pipeline.tsx +++ b/web/src/pages/agent/constant/pipeline.tsx @@ -154,6 +154,22 @@ export enum StringTransformDelimiter { Space = ' ', } +export enum PreprocessValue { + main_content = 'main_content', + section_title = 'title', + abstract = 'abstract', + author = 'author', +} + +export const MAIN_CONTENT_PREPROCESS_VALUE: PreprocessValue = + PreprocessValue.main_content; + +export const PreprocessLabelKeyMap: Record = { + main_content: 'mainContent', + title: 'sectionTitle', + abstract: 'abstract', + author: 'author', +}; export const initialParserValues = { outputs: { markdown: { type: 'string', value: '' }, @@ -166,35 +182,42 @@ export const initialParserValues = { fileFormat: FileType.PDF, output_format: PdfOutputFormat.Json, parse_method: ParseDocumentType.DeepDOC, + preprocess: PreprocessValue.main_content, }, { fileFormat: FileType.Spreadsheet, output_format: SpreadsheetOutputFormat.Html, parse_method: ParseDocumentType.DeepDOC, + preprocess: PreprocessValue.main_content, }, { fileFormat: FileType.Image, output_format: ImageOutputFormat.Text, parse_method: ImageParseMethod.OCR, + preprocess: PreprocessValue.main_content, system_prompt: '', }, { fileFormat: FileType.Email, fields: Object.values(ParserFields), output_format: EmailOutputFormat.Text, + preprocess: PreprocessValue.main_content, }, { fileFormat: FileType.TextMarkdown, output_format: TextMarkdownOutputFormat.Text, + preprocess: PreprocessValue.main_content, }, { fileFormat: FileType.Docx, output_format: DocxOutputFormat.Json, + preprocess: PreprocessValue.main_content, }, { fileFormat: FileType.PowerPoint, output_format: PptOutputFormat.Json, parse_method: ParseDocumentType.DeepDOC, + preprocess: PreprocessValue.main_content, }, ], }; diff --git a/web/src/pages/agent/form/parser-form/index.tsx b/web/src/pages/agent/form/parser-form/index.tsx index 1942b2d05a..a1808db4b6 100644 --- a/web/src/pages/agent/form/parser-form/index.tsx +++ b/web/src/pages/agent/form/parser-form/index.tsx @@ -5,16 +5,17 @@ import { import { RAGFlowFormItem } from '@/components/ragflow-form'; import { BlockButton, Button } from '@/components/ui/button'; import { Form } from '@/components/ui/form'; +import { MultiSelect } from '@/components/ui/multi-select'; import { Separator } from '@/components/ui/separator'; import { cn } from '@/lib/utils'; import { buildOptions } from '@/utils/form'; import { zodResolver } from '@hookform/resolvers/zod'; import { useHover } from 'ahooks'; import { Trash2 } from 'lucide-react'; -import { memo, useCallback, useMemo, useRef } from 'react'; +import { memo, useCallback, useEffect, useMemo, useRef } from 'react'; import { - UseFieldArrayRemove, useFieldArray, + UseFieldArrayRemove, useForm, useFormContext, } from 'react-hook-form'; @@ -24,6 +25,8 @@ import { FileType, InitialOutputFormatMap, initialParserValues, + MAIN_CONTENT_PREPROCESS_VALUE, + PreprocessValue, } from '../../constant/pipeline'; import { useFormValues } from '../../hooks/use-form-values'; import { useWatchFormChange } from '../../hooks/use-watch-form-change'; @@ -41,6 +44,74 @@ import { AudioFormFields, VideoFormFields } from './video-form-fields'; const outputList = buildOutputList(initialParserValues.outputs); +type PreprocessOptionConfig = { + value: PreprocessValue; + required?: boolean; +}; + +const DefaultPreprocessOptionConfigs: PreprocessOptionConfig[] = [ + { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true }, +]; + +const PreprocessOptionConfigsMap: Partial< + Record +> = { + [FileType.PDF]: [ + { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true }, + { value: PreprocessValue.abstract }, + { value: PreprocessValue.author }, + { value: PreprocessValue.section_title }, + ], + [FileType.PowerPoint]: [ + { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true }, + ], + [FileType.Spreadsheet]: [ + { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true }, + ], + [FileType.TextMarkdown]: [ + { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true }, + { value: PreprocessValue.section_title }, + ], + [FileType.Docx]: [ + { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true }, + { value: PreprocessValue.section_title }, + ], +}; + +function getPreprocessOptionConfigs(fileType?: FileType) { + if (!fileType) { + return DefaultPreprocessOptionConfigs; + } + + return PreprocessOptionConfigsMap[fileType] ?? DefaultPreprocessOptionConfigs; +} + +function normalizePreprocessValuesByFileType( + fileType: FileType | undefined, + values: string[] | undefined, +) { + const optionConfigs = getPreprocessOptionConfigs(fileType); + const allowedValueSet = new Set(optionConfigs.map((x) => x.value)); + const requiredValues = optionConfigs + .filter((x) => x.required) + .map((x) => x.value); + const normalizedOptionalValues = (Array.isArray(values) ? values : []).filter( + (value) => allowedValueSet.has(value as PreprocessValue), + ) as PreprocessValue[]; + + return Array.from( + new Set([...requiredValues, ...normalizedOptionalValues]), + ); +} + +function isSameStringArray(a: string[] | undefined, b: string[]) { + if (!a || a.length !== b.length) { + return false; + } + + return a.every((item, idx) => item === b[idx]); +} + const FileFormatWidgetMap = { [FileType.PDF]: PdfFormFields, [FileType.Spreadsheet]: SpreadsheetFormFields, @@ -63,6 +134,7 @@ export const FormSchema = z.object({ setups: z.array( z.object({ fileFormat: z.string().nullish(), + preprocess: z.array(z.string()).optional(), output_format: z.string().optional(), parse_method: z.string().optional(), lang: z.string().optional(), @@ -121,6 +193,57 @@ function ParserItem({ [form, index], ); + const handlePreprocessChange = useCallback( + (value: PreprocessValue[]) => { + form.setValue(`setups.${index}.preprocess`, value, { + shouldDirty: true, + shouldValidate: true, + shouldTouch: true, + }); + }, + [form, index], + ); + + const preprocessOptions = useMemo(() => { + const optionConfigs = getPreprocessOptionConfigs(fileFormat as FileType); + + return optionConfigs.map((optionConfig) => { + const labelMap: Record = { + [MAIN_CONTENT_PREPROCESS_VALUE]: t('flow.preprocess.mainContent'), + [PreprocessValue.section_title]: t('flow.preprocess.sectionTitle'), + [PreprocessValue.abstract]: t('flow.preprocess.abstract'), + [PreprocessValue.author]: t('flow.preprocess.author'), + }; + + const label = labelMap[optionConfig.value] || optionConfig.value; + + return { + value: optionConfig.value, + disabled: optionConfig.required, + label: label, + }; + }); + }, [fileFormat, t]); + + useEffect(() => { + const currentPreprocessValues = form.getValues( + `setups.${index}.preprocess`, + ) as string[] | undefined; + const normalizedPreprocessValues = normalizePreprocessValuesByFileType( + fileFormat as FileType, + currentPreprocessValues, + ); + + if ( + !isSameStringArray(currentPreprocessValues, normalizedPreprocessValues) + ) { + form.setValue(`setups.${index}.preprocess`, normalizedPreprocessValues, { + shouldDirty: false, + shouldValidate: true, + }); + } + }, [fileFormat, form, index]); + return (
+ + {(field) => ( + { + const nextValues = normalizePreprocessValuesByFileType( + fileFormat as FileType, + val, + ); + field.onChange(nextValues); + handlePreprocessChange(nextValues); + }} + showSelectAll={false} + options={preprocessOptions} + > + )} + {index < fieldLength - 1 && }
); @@ -192,6 +335,7 @@ const ParserForm = ({ node }: INextOperatorForm) => { llm_id: '', table_result_type: '', markdown_image_response_type: '', + preprocess: [], }); }, [append]); diff --git a/web/src/pages/agent/utils.ts b/web/src/pages/agent/utils.ts index c830588306..2fe592b578 100644 --- a/web/src/pages/agent/utils.ts +++ b/web/src/pages/agent/utils.ts @@ -214,6 +214,7 @@ function transformParserParams(params: ParserFormSchemaType) { ParserFormSchemaType['setups'][0] & { suffix: string[] } > = { output_format: cur.output_format, + preprocess: cur.preprocess, suffix: FileTypeSuffixMap[cur.fileFormat as FileType], };