feat(file): Add file ancestor directory lookup feature by go (#14037)

### What problem does this PR solve?

feat(file): Add file ancestor directory lookup feature by go

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
chanx
2026-04-14 15:22:03 +08:00
committed by GitHub
parent 6aec8058bb
commit 1031aebc8f
7 changed files with 224 additions and 133 deletions

View File

@@ -219,6 +219,41 @@ func (h *FileHandler) GetAllParentFolders(c *gin.Context) {
})
}
// GetFileAncestors gets all ancestor folders of a file (matches Python /files/<file_id>/ancestors)
// @Summary Get File Ancestors
// @Description Get all ancestor folders in path from file to root
// @Tags file
// @Accept json
// @Produce json
// @Param id path string true "file ID"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/files/{id}/ancestors [get]
func (h *FileHandler) GetFileAncestors(c *gin.Context) {
_, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}
fileID := c.Param("id")
if fileID == "" {
jsonError(c, common.CodeBadRequest, "file id is required")
return
}
parentFolders, err := h.fileService.GetAllParentFolders(fileID)
if err != nil {
jsonError(c, common.CodeServerError, err.Error())
return
}
c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": gin.H{"parent_folders": parentFolders},
"message": common.CodeSuccess.Message(),
})
}
type CreateFolderRequest struct {
Name string `json:"name" binding:"required"`
ParentID string `json:"parent_id"`
@@ -385,6 +420,7 @@ type MoveFileRequest struct {
// - dest_file_id only: move files to a new folder (names unchanged)
// - new_name only: rename a single file in place (no storage operation)
// - both: move and rename simultaneously
//
// @Tags file
// @Accept json
// @Produce json

View File

@@ -196,6 +196,7 @@ func (r *Router) Setup(engine *gin.Engine) {
file.GET("", r.fileHandler.ListFiles)
file.DELETE("", r.fileHandler.DeleteFiles)
file.POST("/move", r.fileHandler.MoveFiles)
file.GET("/:id/ancestors", r.fileHandler.GetFileAncestors)
file.GET("/:id", r.fileHandler.Download)
}

View File

@@ -1507,6 +1507,19 @@ Example: Virtual Hosted Style`,
author: 'Author',
sectionTitle: 'Section title',
},
includeHeadingContent: 'Include heading content',
includeHeadingContentTip:
'When enabled, content directly under a heading is kept as its own chunk. Child chunks keep only the heading path.',
hierarchyTip: `Build a heading tree and produce self-contained chunks, each carrying its full ancestor heading path (e.g. Part 1 Chapter 3 Section 2 + body text).<br>
Best for: Documents with independent, structurally significant sections — such as legal statutes, regulations, contracts, and technical specifications — where each chunk must be identifiable by its structural position even without surrounding context.`,
groupTip: `Split the document flat at a chosen heading level and automatically merge adjacent small sections to preserve content continuity. No parent-heading path is injected.<br>
Best for: Documents with flowing, contextually connected content — such as books, manuals, reports, and articles — where adjacent paragraphs should stay together to maintain narrative coherence.`,
enableMultiColumn: 'Remove original table of contents',
enableMultiColumnTip:
'Detect and parse multi-column page layouts to preserve the correct reading order. Turn this on for PDFs or documents with two-column or newspaper-style layouts.',
removeToc: 'Remove original table of contents',
removeTocTip:
'Remove the table of contents included in the original PDF, so it is not parsed as regular content or chunked for retrieval.',
autoPlay: 'Auto play audio',
downloadFileTypeTip: 'The file type to download',
downloadFileType: 'Download file type',
@@ -2244,7 +2257,7 @@ This process aggregates variables from multiple branches into a single variable
tokenChunkerDescription:
'Split text into chunks by token length with optional delimiters and overlap.',
titleChunkerDescription:
'Split documents into sections by title hierarchy with regex rules for finer control.',
'Split documents into sections by title hierarchy. Define heading levels with regex rules, then choose Hierarchy or Group mode to control how chunks are structured.',
titleChunker: 'Title Chunker',
extractor: 'Transformer',
extractorDescription:
@@ -2267,6 +2280,7 @@ This process aggregates variables from multiple branches into a single variable
},
fields: 'Field',
addParser: 'Add Parser',
group: 'Group',
hierarchy: 'Hierarchy',
regularExpressions: 'Regular Expressions',
overlappedPercent: 'Overlapped percent (%)',

View File

@@ -1253,6 +1253,19 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
author: '作者',
sectionTitle: '章节标题',
},
includeHeadingContent: '包含标题内容',
includeHeadingContentTip:
'启用后,标题下的直接内容将作为一个独立的块保留。子块仅保留标题路径。',
hierarchyTip: `构建标题树并生成独立的块,每个块携带其完整的祖先标题路径(例如 第1部分 第3章 第2节 + 正文)。<br>
适用场景:具有独立的、结构性重要章节的文档——如法律条款、法规、合同和技术规范——其中每个块即使没有上下文也能通过其结构位置来识别。`,
groupTip: `在选定的标题级别将文档扁平分割,并自动合并相邻的小节以保持内容连续性。不注入父标题路径。<br>
适用场景:具有流动性的、内容相关联的文档——如书籍、手册、报告和文章——其中相邻段落应保持在一起以维持叙述连贯性。`,
enableMultiColumn: '启用多栏',
enableMultiColumnTip:
'检测并解析多栏页面布局以保持正确的阅读顺序。对于具有双栏或报纸式布局的PDF或文档请开启此功能。',
removeToc: '移除原始目录',
removeTocTip:
'移除原始PDF中包含的目录这样它就不会被解析为常规内容或作为检索块。',
autoPlay: '自动播放',
downloadFileTypeTip: '文件下载的类型',
downloadFileType: '文件类型',
@@ -1943,7 +1956,7 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
tokenChunkerDescription:
'根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。',
titleChunkerDescription:
'使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。',
'按标题层级拆分文档。通过正则表达式定义各级标题,再选择层级或分组模式控制切片方式。',
titleChunker: '按标题分块',
extractor: '提取器',
extractorDescription:
@@ -1966,6 +1979,7 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
},
fields: '字段',
addParser: '增加解析器',
group: '聚合',
hierarchy: '层次结构',
regularExpressions: '正则表达式',
overlappedPercent: '重叠百分比(%',

View File

@@ -93,9 +93,10 @@ export function TwoColumnCheckFormField({ prefix }: CommonProps) {
return (
<RAGFlowFormItem
name={buildFieldNameWithPrefix(`enable_multi_column`, prefix)}
label={t('flow.enableMultiColumn', 'Enable multi column')}
label={t('flow.enableMultiColumn')}
horizontal={true}
labelClassName="w-[200px]"
labelClassName="w-full"
tooltip={t('flow.enableMultiColumnTip')}
>
{(field) => (
<Checkbox
@@ -114,9 +115,10 @@ export function RmdirFormField({ prefix }: CommonProps) {
return (
<RAGFlowFormItem
name={buildFieldNameWithPrefix(`remove_toc`, prefix)}
label={t('flow.remove_toc', 'Remove TOC')}
label={t('flow.removeToc')}
horizontal={true}
labelClassName="w-[200px]"
tooltip={t('flow.removeTocTip')}
labelClassName="w-full"
>
{(field) => (
<Checkbox

View File

@@ -5,14 +5,13 @@ import {
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { BlockButton, Button } from '@/components/ui/button';
import { Form } from '@/components/ui/form';
import { MultiSelect } from '@/components/ui/multi-select';
import { Separator } from '@/components/ui/separator';
import { cn } from '@/lib/utils';
import { buildOptions } from '@/utils/form';
import { zodResolver } from '@hookform/resolvers/zod';
import { useHover } from 'ahooks';
import { Trash2 } from 'lucide-react';
import { memo, useCallback, useEffect, useMemo, useRef } from 'react';
import { memo, useCallback, useMemo, useRef } from 'react';
import {
useFieldArray,
UseFieldArrayRemove,
@@ -25,8 +24,6 @@ import {
FileType,
InitialOutputFormatMap,
initialParserValues,
MAIN_CONTENT_PREPROCESS_VALUE,
PreprocessValue,
} from '../../constant/pipeline';
import { useFormValues } from '../../hooks/use-form-values';
import { useWatchFormChange } from '../../hooks/use-watch-form-change';
@@ -49,82 +46,82 @@ import { WordFormFields } from './word-form-fields';
const outputList = buildOutputList(initialParserValues.outputs);
type PreprocessOptionConfig = {
value: PreprocessValue;
required?: boolean;
};
// type PreprocessOptionConfig = {
// value: PreprocessValue;
// required?: boolean;
// };
const DefaultPreprocessOptionConfigs: PreprocessOptionConfig[] = [
{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
];
// const DefaultPreprocessOptionConfigs: PreprocessOptionConfig[] = [
// { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
// ];
const PreprocessOptionConfigsMap: Partial<
Record<FileType, PreprocessOptionConfig[]>
> = {
[FileType.PDF]: [
{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
{ value: PreprocessValue.abstract },
{ value: PreprocessValue.author },
{ value: PreprocessValue.section_title },
],
[FileType.PowerPoint]: [
{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
],
[FileType.Spreadsheet]: [
{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
],
[FileType.TextMarkdown]: [
{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
{ value: PreprocessValue.section_title },
],
[FileType.Code]: [{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true }],
[FileType.Html]: [
{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
{ value: PreprocessValue.section_title },
],
[FileType.Doc]: [
{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
{ value: PreprocessValue.section_title },
],
[FileType.Docx]: [
{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
{ value: PreprocessValue.section_title },
],
};
// const PreprocessOptionConfigsMap: Partial<
// Record<FileType, PreprocessOptionConfig[]>
// > = {
// [FileType.PDF]: [
// { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
// { value: PreprocessValue.abstract },
// { value: PreprocessValue.author },
// { value: PreprocessValue.section_title },
// ],
// [FileType.PowerPoint]: [
// { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
// ],
// [FileType.Spreadsheet]: [
// { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
// ],
// [FileType.TextMarkdown]: [
// { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
// { value: PreprocessValue.section_title },
// ],
// [FileType.Code]: [{ value: MAIN_CONTENT_PREPROCESS_VALUE, required: true }],
// [FileType.Html]: [
// { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
// { value: PreprocessValue.section_title },
// ],
// [FileType.Doc]: [
// { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
// { value: PreprocessValue.section_title },
// ],
// [FileType.Docx]: [
// { value: MAIN_CONTENT_PREPROCESS_VALUE, required: true },
// { value: PreprocessValue.section_title },
// ],
// };
function getPreprocessOptionConfigs(fileType?: FileType) {
if (!fileType) {
return DefaultPreprocessOptionConfigs;
}
// function getPreprocessOptionConfigs(fileType?: FileType) {
// if (!fileType) {
// return DefaultPreprocessOptionConfigs;
// }
return PreprocessOptionConfigsMap[fileType] ?? DefaultPreprocessOptionConfigs;
}
// return PreprocessOptionConfigsMap[fileType] ?? DefaultPreprocessOptionConfigs;
// }
function normalizePreprocessValuesByFileType(
fileType: FileType | undefined,
values: string[] | undefined,
) {
const optionConfigs = getPreprocessOptionConfigs(fileType);
const allowedValueSet = new Set(optionConfigs.map((x) => x.value));
const requiredValues = optionConfigs
.filter((x) => x.required)
.map((x) => x.value);
const normalizedOptionalValues = (Array.isArray(values) ? values : []).filter(
(value) => allowedValueSet.has(value as PreprocessValue),
) as PreprocessValue[];
// function normalizePreprocessValuesByFileType(
// fileType: FileType | undefined,
// values: string[] | undefined,
// ) {
// const optionConfigs = getPreprocessOptionConfigs(fileType);
// const allowedValueSet = new Set(optionConfigs.map((x) => x.value));
// const requiredValues = optionConfigs
// .filter((x) => x.required)
// .map((x) => x.value);
// const normalizedOptionalValues = (Array.isArray(values) ? values : []).filter(
// (value) => allowedValueSet.has(value as PreprocessValue),
// ) as PreprocessValue[];
return Array.from(
new Set<PreprocessValue>([...requiredValues, ...normalizedOptionalValues]),
);
}
// return Array.from(
// new Set<PreprocessValue>([...requiredValues, ...normalizedOptionalValues]),
// );
// }
function isSameStringArray(a: string[] | undefined, b: string[]) {
if (!a || a.length !== b.length) {
return false;
}
// function isSameStringArray(a: string[] | undefined, b: string[]) {
// if (!a || a.length !== b.length) {
// return false;
// }
return a.every((item, idx) => item === b[idx]);
}
// return a.every((item, idx) => item === b[idx]);
// }
const FileFormatWidgetMap = {
[FileType.PDF]: PdfFormFields,
@@ -151,7 +148,7 @@ export const FormSchema = z.object({
setups: z.array(
z.object({
fileFormat: z.string().nullish(),
preprocess: z.array(z.string()).optional(),
// preprocess: z.array(z.string()).optional(),
output_format: z.string().optional(),
parse_method: z.string().optional(),
lang: z.string().optional(),
@@ -212,56 +209,56 @@ function ParserItem({
[form, index],
);
const handlePreprocessChange = useCallback(
(value: PreprocessValue[]) => {
form.setValue(`setups.${index}.preprocess`, value, {
shouldDirty: true,
shouldValidate: true,
shouldTouch: true,
});
},
[form, index],
);
// const handlePreprocessChange = useCallback(
// (value: PreprocessValue[]) => {
// form.setValue(`setups.${index}.preprocess`, value, {
// shouldDirty: true,
// shouldValidate: true,
// shouldTouch: true,
// });
// },
// [form, index],
// );
const preprocessOptions = useMemo(() => {
const optionConfigs = getPreprocessOptionConfigs(fileFormat as FileType);
// const preprocessOptions = useMemo(() => {
// const optionConfigs = getPreprocessOptionConfigs(fileFormat as FileType);
return optionConfigs.map((optionConfig) => {
const labelMap: Record<string, string> = {
[MAIN_CONTENT_PREPROCESS_VALUE]: t('flow.preprocess.mainContent'),
[PreprocessValue.section_title]: t('flow.preprocess.sectionTitle'),
[PreprocessValue.abstract]: t('flow.preprocess.abstract'),
[PreprocessValue.author]: t('flow.preprocess.author'),
};
// return optionConfigs.map((optionConfig) => {
// const labelMap: Record<string, string> = {
// [MAIN_CONTENT_PREPROCESS_VALUE]: t('flow.preprocess.mainContent'),
// [PreprocessValue.section_title]: t('flow.preprocess.sectionTitle'),
// [PreprocessValue.abstract]: t('flow.preprocess.abstract'),
// [PreprocessValue.author]: t('flow.preprocess.author'),
// };
const label = labelMap[optionConfig.value] || optionConfig.value;
// const label = labelMap[optionConfig.value] || optionConfig.value;
return {
value: optionConfig.value,
disabled: optionConfig.required,
label: label,
};
});
}, [fileFormat, t]);
// return {
// value: optionConfig.value,
// disabled: optionConfig.required,
// label: label,
// };
// });
// }, [fileFormat, t]);
useEffect(() => {
const currentPreprocessValues = form.getValues(
`setups.${index}.preprocess`,
) as string[] | undefined;
const normalizedPreprocessValues = normalizePreprocessValuesByFileType(
fileFormat as FileType,
currentPreprocessValues,
);
// useEffect(() => {
// const currentPreprocessValues = form.getValues(
// `setups.${index}.preprocess`,
// ) as string[] | undefined;
// const normalizedPreprocessValues = normalizePreprocessValuesByFileType(
// fileFormat as FileType,
// currentPreprocessValues,
// );
if (
!isSameStringArray(currentPreprocessValues, normalizedPreprocessValues)
) {
form.setValue(`setups.${index}.preprocess`, normalizedPreprocessValues, {
shouldDirty: false,
shouldValidate: true,
});
}
}, [fileFormat, form, index]);
// if (
// !isSameStringArray(currentPreprocessValues, normalizedPreprocessValues)
// ) {
// form.setValue(`setups.${index}.preprocess`, normalizedPreprocessValues, {
// shouldDirty: false,
// shouldValidate: true,
// });
// }
// }, [fileFormat, form, index]);
return (
<section
@@ -301,7 +298,7 @@ function ParserItem({
fileType={fileFormat as FileType}
/>
</div>
<RAGFlowFormItem
{/* <RAGFlowFormItem
name={buildFieldNameWithPrefix(`preprocess`, prefix)}
label={t('flow.preprocess.preprocess')}
>
@@ -320,7 +317,7 @@ function ParserItem({
options={preprocessOptions}
></MultiSelect>
)}
</RAGFlowFormItem>
</RAGFlowFormItem> */}
{index < fieldLength - 1 && <Separator />}
</section>
);
@@ -351,10 +348,10 @@ const ParserForm = ({ node }: INextOperatorForm) => {
parse_method: '',
lang: '',
fields: [],
llm_id: '',
vlm: { llm_id: '' },
table_result_type: '',
markdown_image_response_type: '',
preprocess: [],
// preprocess: [],
});
}, [append]);

View File

@@ -8,7 +8,7 @@ import { Form } from '@/components/ui/form';
import { Input } from '@/components/ui/input';
import { zodResolver } from '@hookform/resolvers/zod';
import { Trash2 } from 'lucide-react';
import { memo, useEffect, useRef } from 'react';
import { memo, useEffect, useRef, useState } from 'react';
import { useFieldArray, useForm, useFormContext } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
@@ -197,6 +197,7 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
});
const isInitialized = useRef(false);
const initialMode = useRef<string | undefined>(undefined);
const [showAllTip, setShowAllTip] = useState(true);
const method = form.watch('method');
const name = 'rules';
@@ -210,6 +211,7 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
}
if (method !== initialMode.current) {
setShowAllTip(true);
const currentMode = initialMode.current;
const hierarchyValue = form.getValues('hierarchy');
const rulesValue = form.getValues('rules');
@@ -290,6 +292,34 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
],
}}
/>
{/* <div className={cn("text-xs text-text-secondary w-full border p-1", showAllTip ? "block" : "")}>
{method === 'hierarchy' && t('flow.hierarchyTip')}
{method === 'group' && t('flow.groupTip')}
</div> */}
<div
className={`text-xs text-text-secondary w-full border rounded-sm p-2 cursor-pointer ${showAllTip ? 'block' : 'truncate'}`}
onClick={() => setShowAllTip(!showAllTip)}
>
<div className="flex flex-col justify-start items-center">
<span
className="flex self-start"
dangerouslySetInnerHTML={{
__html:
method === 'hierarchy'
? t('flow.hierarchyTip')
: method === 'group'
? t('flow.groupTip')
: '',
}}
>
{/* {method === 'hierarchy' && t('flow.hierarchyTip')}
{method === 'group' && t('flow.groupTip')} */}
</span>
{/* <span className="flex ml-2 text-xs self-center">
{showAllTip ? '▲' : ''}
</span> */}
</div>
</div>
<RAGFlowFormItem name={'hierarchy'} label={''}>
<SelectWithSearch options={hierarchyOptions}></SelectWithSearch>
</RAGFlowFormItem>
@@ -297,12 +327,9 @@ const TitleChunkerForm = ({ node }: INextOperatorForm) => {
<RAGFlowFormItem
name="include_heading_content"
label={t('flow.includeHeadingContent', 'Include heading content')}
tooltip={t(
'flow.includeHeadingContentTip',
'When enabled, content directly under a heading is kept as its own chunk. Child chunks keep only the heading path.',
)}
tooltip={t('flow.includeHeadingContentTip')}
horizontal={true}
labelClassName="w-[200px]"
labelClassName="w-full"
>
{(field) => (
<Checkbox