Fix: The regular expression configuration for pipeline header-based chunking will be reset. (#15935)

### What problem does this PR solve?

Fix: The regular expression configuration for pipeline header-based
chunking will be reset.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
balibabu
2026-06-11 14:12:24 +08:00
committed by GitHub
parent 906618fb30
commit 5d3f8bbf32
5 changed files with 198 additions and 271 deletions

View File

@@ -1249,7 +1249,7 @@ NER使用 spaCy NER 和基于规则的关键词提取来抽取实体和关系
confirmPasswordMessage: '请确认新密码',
confirmPasswordNonMatchMessage: '您输入的新密码不匹配!',
cancel: '取消',
addedModels: '添加的模型',
addedModels: '添加的模型',
modelsToBeAdded: '待添加的模型',
addTheModel: '添加',
apiKey: 'API-Key',

View File

@@ -3,6 +3,7 @@ import {
initialLlmBaseValues,
DataflowOperator as Operator,
} from '@/constants/agent';
import { cloneDeep } from 'lodash';
export enum FileType {
PDF = 'pdf',
@@ -278,7 +279,12 @@ export enum Hierarchy {
H4 = '4',
H5 = '5',
}
const rules = [
export enum TitleChunkerMethod {
Hierarchy = 'hierarchy',
Group = 'group',
}
export const originalRules = [
{
// levels: [
// { expression: '^#[^#]' },
@@ -331,23 +337,18 @@ const rules = [
],
},
];
export const initialTitleChunkerValues = {
outputs: {
chunks: { type: 'Array<Object>', value: [] },
},
method: 'hierarchy',
hierarchy: Hierarchy.H3,
method: TitleChunkerMethod.Hierarchy,
hierarchyHierarchy: Hierarchy.H3,
hierarchyGroup: '0',
include_heading_content: false,
root_chunk_as_heading: false,
rules: rules,
};
export const initialGroupValues = {
method: 'group',
hierarchy: '0',
include_heading_content: false,
root_chunk_as_heading: false,
rules: rules,
hierarchyRules: cloneDeep(originalRules),
groupRules: cloneDeep(originalRules),
};
export const initialExtractorValues = {

View File

@@ -1,15 +1,17 @@
import { isEmpty } from 'lodash';
import { useEffect, useMemo } from 'react';
import { cloneDeep } from 'lodash';
import { useMemo } from 'react';
import { UseFormReturn, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { TitleChunkerFormSchemaType } from '.';
import { Hierarchy, initialTitleChunkerValues } from '../../constant/pipeline';
// type initialValuesType = typeof initialHierarchicalMergerValues;
import {
Hierarchy,
originalRules,
TitleChunkerMethod,
} from '../../constant/pipeline';
function transformLevelsToRules(levels: any[]) {
if (!Array.isArray(levels)) {
return initialTitleChunkerValues.rules;
return originalRules;
}
return levels
@@ -53,83 +55,81 @@ function filterEmptyRules(rules: any[]) {
.filter((rule) => rule !== null);
}
// function isRulesFormatCorrect(rules: any): boolean {
// if (!rules || !Array.isArray(rules)) {
// return false;
// }
// if (rules.length === 0) {
// return false;
// }
// if (!rules[0] || typeof rules[0] !== 'object') {
// return false;
// }
// if (!Array.isArray(rules[0].levels)) {
// return false;
// }
// return true;
// }
function transformApiResponseToForm(
apiData: Record<string, any>,
): TitleChunkerFormSchemaType {
if (!apiData) {
return apiData;
}
if (isEmpty(apiData)) {
return apiData as TitleChunkerFormSchemaType;
}
const method = apiData.method as 'hierarchy' | 'group';
const method = apiData.method as TitleChunkerMethod;
let hierarchy = apiData.hierarchy;
if (typeof hierarchy === 'number') {
hierarchy = String(hierarchy);
}
if (method === 'group' && !hierarchy) {
hierarchy = '0';
// Split hierarchy into two fields by method, and support backward compatibility
// with the single `hierarchy` field.
let hierarchyHierarchy = apiData.hierarchyHierarchy;
let hierarchyGroup = apiData.hierarchyGroup;
if (!hierarchyHierarchy && !hierarchyGroup && hierarchy) {
if (method === TitleChunkerMethod.Hierarchy) {
hierarchyHierarchy = hierarchy;
} else if (method === TitleChunkerMethod.Group) {
hierarchyGroup = hierarchy;
}
}
if (method === TitleChunkerMethod.Group && !hierarchyGroup) {
hierarchyGroup = '0';
}
if (method === TitleChunkerMethod.Hierarchy && !hierarchyHierarchy) {
hierarchyHierarchy = hierarchy || Hierarchy.H3;
}
// Extract the new-format rules field, or fall back to legacy formats.
let rules = apiData.rules;
// Check whether the API returned the oldest `levels` format (array of string arrays).
const hasLevelsData = apiData.levels && Array.isArray(apiData.levels);
if (hasLevelsData) {
// Convert the legacy `levels` structure into the modern `rules` shape.
rules = transformLevelsToRules(apiData.levels);
} else if (rules && Array.isArray(rules)) {
// Clean up the current-format rules by stripping out empty expressions.
rules = filterEmptyRules(rules);
}
// const rulesFormatCorrect = isRulesFormatCorrect(rules);
// Backward compatibility: older versions only had a generic `rules` field,
// while newer versions split it into `hierarchyRules` and `groupRules`.
// When the backend returns legacy data, migrate the old `rules` to the
// corresponding new field based on the current `method` so that user
// configurations are not lost.
let hierarchyRules = apiData.hierarchyRules;
let groupRules = apiData.groupRules;
// if (method === 'group') {
// if (rulesFormatCorrect) {
// return {
// method,
// hierarchy,
// rules,
// };
// }
// return {
// method,
// hierarchy,
// rules,
// };
// }
if (!hierarchyRules) {
if (method === TitleChunkerMethod.Hierarchy) {
hierarchyRules = cloneDeep(rules) || cloneDeep(originalRules);
} else {
hierarchyRules = cloneDeep(originalRules);
}
}
// if (rulesFormatCorrect && method === 'hierarchy') {
// return {
// method,
// hierarchy,
// rules,
// };
// }
if (!groupRules) {
if (method === TitleChunkerMethod.Group) {
groupRules = cloneDeep(rules) || cloneDeep(originalRules);
} else {
groupRules = cloneDeep(originalRules);
}
}
return {
method,
hierarchy,
hierarchyHierarchy,
hierarchyGroup,
include_heading_content: Boolean(apiData.include_heading_content),
root_chunk_as_heading: Boolean(apiData.root_chunk_as_heading),
rules,
hierarchyRules,
groupRules,
};
}
@@ -164,13 +164,12 @@ export function useDynamicHierarchyOptions(
const { t } = useTranslation();
const rules = useWatch({ name, control: form?.control });
const method = useWatch({ name: 'method', control: form?.control });
const currentHierarchy = form.watch('hierarchy');
const hierarchyOptions = useMemo(() => {
const maxLevelCount = calculateMaxLevelCount(rules);
const options = getDynamicHierarchyOptions(maxLevelCount);
if (method === 'group') {
if (method === TitleChunkerMethod.Group) {
return [
{ label: t('common.automatic', 'Automatic'), value: '0' },
...options,
@@ -180,18 +179,6 @@ export function useDynamicHierarchyOptions(
return options;
}, [method, rules, t]);
useEffect(() => {
if (!currentHierarchy || !form) {
return;
}
const maxOptionValue = hierarchyOptions[hierarchyOptions.length - 1]?.value;
if (maxOptionValue && currentHierarchy > maxOptionValue) {
form.setValue('hierarchy', maxOptionValue);
}
}, [currentHierarchy, hierarchyOptions, form]);
return hierarchyOptions;
}

View File

@@ -9,14 +9,18 @@ import { Switch } from '@/components/ui/switch';
import { cn } from '@/lib/utils';
import { zodResolver } from '@hookform/resolvers/zod';
import { ChevronDown, ChevronUp, Trash2 } from 'lucide-react';
import { memo, useEffect, useRef, useState } from 'react';
import { useFieldArray, useForm, useFormContext } from 'react-hook-form';
import { memo, useState } from 'react';
import {
useFieldArray,
useForm,
useFormContext,
useWatch,
} from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
import {
Hierarchy,
initialGroupValues,
initialTitleChunkerValues,
TitleChunkerMethod,
} from '../../constant/pipeline';
import { useFormValues } from '../../hooks/use-form-values';
import { useWatchFormChange } from '../../hooks/use-watch-form-change';
@@ -26,13 +30,6 @@ import { FormWrapper } from '../components/form-wrapper';
import { Output } from '../components/output';
import { transformApiResponseToForm, useDynamicHierarchyOptions } from './hook';
type FormModeValues = {
hierarchy?: string;
include_heading_content?: boolean;
root_chunk_as_heading?: boolean;
rules: Array<{ levels: Array<{ expression: string }> }>;
};
const outputList = buildOutputList(initialTitleChunkerValues.outputs);
const rulesSchema = z.array(
@@ -58,13 +55,20 @@ const rulesSchema = z.array(
);
export const FormSchema = z.object({
method: z.enum(['hierarchy', 'group']),
hierarchy: z.string().optional(),
method: z.nativeEnum(TitleChunkerMethod),
hierarchyHierarchy: z.string().optional(),
hierarchyGroup: z.string().optional(),
include_heading_content: z.boolean().optional(),
root_chunk_as_heading: z.boolean().optional(),
rules: rulesSchema,
hierarchyRules: rulesSchema,
groupRules: rulesSchema,
});
export enum TitleChunkerRulesField {
Hierarchy = 'hierarchyRules',
Group = 'groupRules',
}
export type TitleChunkerFormSchemaType = z.infer<typeof FormSchema>;
type LevelItemProps = {
@@ -154,135 +158,76 @@ function CardBody({ cardName }: CardBodyProps) {
);
}
// type GroupCardBodyProps = {
// cardName: string;
// };
type RulesFieldArrayProps = {
name: TitleChunkerRulesField;
};
// function GroupCardBody({ cardName }: GroupCardBodyProps) {
// const { t } = useTranslation();
// const form = useFormContext();
function RulesFieldArray({ name }: RulesFieldArrayProps) {
const { t } = useTranslation();
const form = useFormContext();
const { fields, append, remove } = useFieldArray({
name,
control: form.control,
});
// const levelsName = `${cardName}.levels`;
// const { fields: levelFields } = useFieldArray({
// name: levelsName,
// control: form.control,
// });
// return (
// <CardContent className="p-4">
// <div className="space-y-4">
// {levelFields.map((levelField, levelIndex) => (
// <RAGFlowFormItem
// key={levelField.id}
// name={`${levelsName}.${levelIndex}.expression`}
// label={`${t('flow.regularExpressions')}`}
// >
// <Input />
// </RAGFlowFormItem>
// ))}
// </div>
// </CardContent>
// );
// }
return (
<div className="space-y-4">
{fields.map((cardField, cardIndex) => (
<Card key={cardField.id}>
<CardHeader className="flex flex-row justify-between items-center py-3 px-4 border-b bg-muted/20">
<div className="flex items-center gap-2">
<span className="font-medium text-sm">
{t('flow.rule', 'Rule')} {cardIndex + 1}
</span>
</div>
{fields.length > 1 && (
<Button
type="button"
variant={'ghost'}
size="sm"
onClick={() => remove(cardIndex)}
className="h-7 w-7 p-0 text-muted-foreground hover:text-destructive"
>
<Trash2 className="h-4 w-4" />
</Button>
)}
</CardHeader>
<CardBody cardIndex={cardIndex} cardName={`${name}.${cardIndex}`} />
</Card>
))}
<BlockButton
onClick={() =>
append({
levels: [{ expression: '' }],
})
}
className="mt-4"
>
{t('flow.addRule', 'Add Rule')}
</BlockButton>
</div>
);
}
const TitleChunkerForm = ({ node }: INextOperatorForm) => {
const { t } = useTranslation();
const initialValues = useFormValues(initialTitleChunkerValues, node);
const hierarchyModeValues = useRef<FormModeValues | null>(null);
const groupValues = useRef<FormModeValues | null>(null);
const form = useForm<TitleChunkerFormSchemaType>({
defaultValues: transformApiResponseToForm(initialValues),
resolver: zodResolver(FormSchema),
mode: 'onChange',
});
const isInitialized = useRef(false);
const initialMode = useRef<string | undefined>(undefined);
const [showAllTip, setShowAllTip] = useState(true);
const method = form.watch('method');
const name = 'rules';
const hierarchyOptions = useDynamicHierarchyOptions(form, name);
const method = useWatch({ name: 'method', control: form.control });
useEffect(() => {
if (!isInitialized.current) {
initialMode.current = method;
isInitialized.current = true;
return;
}
const activeRulesName =
method === TitleChunkerMethod.Group
? TitleChunkerRulesField.Group
: TitleChunkerRulesField.Hierarchy;
if (method !== initialMode.current) {
setShowAllTip(true);
const currentMode = initialMode.current;
const hierarchyValue = form.getValues('hierarchy');
const rulesValue = form.getValues('rules');
if (currentMode === 'hierarchy') {
hierarchyModeValues.current = {
hierarchy: hierarchyValue,
include_heading_content: form.getValues('include_heading_content'),
root_chunk_as_heading: form.getValues('root_chunk_as_heading'),
rules: rulesValue,
};
} else if (currentMode === 'group') {
groupValues.current = {
hierarchy: hierarchyValue,
include_heading_content: form.getValues('include_heading_content'),
root_chunk_as_heading: form.getValues('root_chunk_as_heading'),
rules: rulesValue,
};
}
initialMode.current = method;
if (method === 'group') {
const modeValues = groupValues.current;
form.reset({
method: 'group',
hierarchy: modeValues?.hierarchy ?? '0',
include_heading_content: false,
root_chunk_as_heading: false,
rules: modeValues?.rules || initialGroupValues.rules,
});
} else {
const defaultHierarchy = Hierarchy.H3;
let modeValues: FormModeValues | null = null;
modeValues = hierarchyModeValues.current;
if (modeValues) {
form.reset({
method: method,
hierarchy: modeValues.hierarchy || defaultHierarchy,
include_heading_content:
modeValues.include_heading_content || false,
root_chunk_as_heading: modeValues.root_chunk_as_heading || false,
rules: modeValues.rules,
});
} else {
const newModeValues: FormModeValues = {
hierarchy: defaultHierarchy,
include_heading_content: false,
root_chunk_as_heading: false,
rules: JSON.parse(JSON.stringify(initialTitleChunkerValues.rules)),
};
form.reset({
method: method,
hierarchy: defaultHierarchy,
include_heading_content: newModeValues.include_heading_content,
root_chunk_as_heading: newModeValues.root_chunk_as_heading,
rules: newModeValues.rules,
});
}
}
}
}, [method, form]);
const { fields, append, remove } = useFieldArray({
name: name,
control: form.control,
});
const hierarchyOptions = useDynamicHierarchyOptions(form, activeRulesName);
useWatchFormChange(node?.id, form);
@@ -295,16 +240,18 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
type: FormFieldType.Segmented,
label: '',
options: [
{ label: t('flow.hierarchy'), value: 'hierarchy' },
{
label: t('flow.hierarchy'),
value: TitleChunkerMethod.Hierarchy,
},
// { label: t('flow.tree', 'Tree'), value: 'tree' },
{ label: t('flow.group', 'Group'), value: 'group' },
{
label: t('flow.group', 'Group'),
value: TitleChunkerMethod.Group,
},
],
}}
/>
{/* <div className={cn("text-xs text-text-secondary w-full border p-1", showAllTip ? "block" : "")}>
{method === 'hierarchy' && t('flow.hierarchyTip')}
{method === 'group' && t('flow.groupTip')}
</div> */}
<div
className={`text-xs text-text-secondary w-full cursor-pointer `}
onClick={() => setShowAllTip(!showAllTip)}
@@ -316,9 +263,9 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
showAllTip ? 'whitespace-pre-wrap' : 'truncate',
)}
>
{method === 'hierarchy'
{method === TitleChunkerMethod.Hierarchy
? t('flow.hierarchyTip')
: method === 'group'
: method === TitleChunkerMethod.Group
? t('flow.groupTip')
: ''}
</div>
@@ -327,10 +274,23 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
</div>
</div>
</div>
<RAGFlowFormItem name={'hierarchy'} label={''}>
<RAGFlowFormItem
name={'hierarchyHierarchy'}
label={''}
className={cn({ hidden: method !== TitleChunkerMethod.Hierarchy })}
>
<SelectWithSearch options={hierarchyOptions}></SelectWithSearch>
</RAGFlowFormItem>
{method === 'hierarchy' && (
<RAGFlowFormItem
name={'hierarchyGroup'}
label={''}
className={cn({ hidden: method !== TitleChunkerMethod.Group })}
>
<SelectWithSearch options={hierarchyOptions}></SelectWithSearch>
</RAGFlowFormItem>
{method === TitleChunkerMethod.Hierarchy && (
<>
<RAGFlowFormItem
name="include_heading_content"
@@ -372,56 +332,18 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
</RAGFlowFormItem>
</>
)}
{/* {method === 'group' ? (
<Card>
<CardHeader className="flex flex-row justify-between items-center py-3 px-4 border-b bg-muted/20">
<span className="font-medium text-sm">
{t('flow.rule', 'Rule')} 1
</span>
</CardHeader>
<GroupCardBody cardName={`${name}.0`} />
</Card>
) : ( */}
<div className="space-y-4">
{fields.map((cardField, cardIndex) => (
<Card key={cardField.id}>
<CardHeader className="flex flex-row justify-between items-center py-3 px-4 border-b bg-muted/20">
<div className="flex items-center gap-2">
<span className="font-medium text-sm">
{t('flow.rule', 'Rule')} {cardIndex + 1}
</span>
</div>
{fields.length > 1 && (
<Button
type="button"
variant={'ghost'}
size="sm"
onClick={() => remove(cardIndex)}
className="h-7 w-7 p-0 text-muted-foreground hover:text-destructive"
>
<Trash2 className="h-4 w-4" />
</Button>
)}
</CardHeader>
<CardBody
cardIndex={cardIndex}
cardName={`${name}.${cardIndex}`}
/>
</Card>
))}
</div>
{/* )} */}
{/* {method !== 'group' && ( */}
<BlockButton
onClick={() =>
append({
levels: [{ expression: '' }],
})
<div
className={
method === TitleChunkerMethod.Hierarchy ? 'block' : 'hidden'
}
className="mt-4"
>
{t('flow.addRule', 'Add Rule')}
</BlockButton>
<RulesFieldArray name={TitleChunkerRulesField.Hierarchy} />
</div>
<div
className={method === TitleChunkerMethod.Group ? 'block' : 'hidden'}
>
<RulesFieldArray name={TitleChunkerRulesField.Group} />
</div>
{/* )} */}
</FormWrapper>
<div className="p-5">

View File

@@ -33,6 +33,7 @@ import {
NoDebugOperatorsList,
NodeHandleId,
Operator,
TitleChunkerMethod,
TypesWithArray,
WebhookSecurityAuthType,
} from './constant';
@@ -353,13 +354,29 @@ function transformTokenChunkerParams(params: TokenChunkerFormSchemaType) {
}
function transformTitleChunkerParams(params: TitleChunkerFormSchemaType) {
const levels = params.rules.map((rule) =>
const activeRules =
params.method === TitleChunkerMethod.Group
? params.groupRules
: params.hierarchyRules;
const levels = (activeRules || []).map((rule) =>
transformObjectArrayToPureArray(rule.levels, 'expression'),
);
const hierarchyValue =
params.method === TitleChunkerMethod.Group
? params.hierarchyGroup
: params.hierarchyHierarchy;
return {
...omit(params, [
'hierarchyRules',
'groupRules',
'hierarchyHierarchy',
'hierarchyGroup',
]),
method: params.method,
hierarchy: Number(params.hierarchy || 0),
hierarchy: Number(hierarchyValue || 0),
include_heading_content: Boolean(params.include_heading_content),
root_chunk_as_heading: Boolean(params.root_chunk_as_heading),
levels,