mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-04 18:45:38 +08:00
Feat: add button to turn off vlm parsing (#14125)
### What problem does this PR solve? Feat: add button to turn off vlm parsing ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: chanx <1243304602@qq.com>
This commit is contained in:
@@ -234,10 +234,6 @@ class BaseTitleChunker(ABC):
|
||||
return self.resolve_outline_levels(line_records) or self.resolve_frequency_levels(line_records)
|
||||
|
||||
|
||||
def resolve_manual_levels(self, line_records):
|
||||
return self.resolve_title_levels(line_records)["levels"]
|
||||
|
||||
|
||||
def build_chunks_from_record_groups(self, record_groups):
|
||||
# Strategy code decides record grouping. This method materializes each
|
||||
# group into the output chunk representation. For PDF-like inputs, the
|
||||
|
||||
@@ -110,6 +110,7 @@ class ParserParam(ProcessParamBase):
|
||||
"pdf": {
|
||||
"parse_method": "deepdoc", # deepdoc/plain_text/tcadp_parser/vlm
|
||||
"lang": "Chinese",
|
||||
"flatten_media_to_text": False,
|
||||
"remove_toc": False,
|
||||
"suffix": [
|
||||
"pdf",
|
||||
@@ -118,6 +119,7 @@ class ParserParam(ProcessParamBase):
|
||||
},
|
||||
"spreadsheet": {
|
||||
"parse_method": "deepdoc", # deepdoc/tcadp_parser
|
||||
"flatten_media_to_text": False,
|
||||
"output_format": "html",
|
||||
"suffix": [
|
||||
"xls",
|
||||
@@ -133,6 +135,7 @@ class ParserParam(ProcessParamBase):
|
||||
"output_format": "json",
|
||||
},
|
||||
"docx": {
|
||||
"flatten_media_to_text": False,
|
||||
"remove_toc": False,
|
||||
"suffix": [
|
||||
"docx",
|
||||
@@ -140,6 +143,7 @@ class ParserParam(ProcessParamBase):
|
||||
"output_format": "json",
|
||||
},
|
||||
"markdown": {
|
||||
"flatten_media_to_text": False,
|
||||
"suffix": ["md", "markdown", "mdx"],
|
||||
"remove_toc": False,
|
||||
"output_format": "json",
|
||||
@@ -312,6 +316,7 @@ class Parser(ProcessBase):
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a PDF.")
|
||||
conf = self._param.setups["pdf"]
|
||||
self.set_output("output_format", conf["output_format"])
|
||||
flatten_media_to_text = conf.get("flatten_media_to_text")
|
||||
pdf_parser = None
|
||||
|
||||
# Optional PDF post-processing flags applied after parsing.
|
||||
@@ -571,7 +576,9 @@ class Parser(ProcessBase):
|
||||
layout_counters[layout] = seq + 1
|
||||
b["layoutno"] = f"{layout}-{seq}"
|
||||
|
||||
if layout == "table":
|
||||
if flatten_media_to_text:
|
||||
b["doc_type_kwd"] = "text"
|
||||
elif layout == "table":
|
||||
b["doc_type_kwd"] = "table"
|
||||
elif layout == "figure":
|
||||
b["doc_type_kwd"] = "image"
|
||||
@@ -668,6 +675,7 @@ class Parser(ProcessBase):
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a Spreadsheet.")
|
||||
conf = self._param.setups["spreadsheet"]
|
||||
self.set_output("output_format", conf["output_format"])
|
||||
flatten_media_to_text = conf.get("flatten_media_to_text")
|
||||
|
||||
parse_method = conf.get("parse_method", "deepdoc")
|
||||
|
||||
@@ -723,7 +731,12 @@ class Parser(ProcessBase):
|
||||
# Add tables as text
|
||||
for table in tables:
|
||||
if table:
|
||||
result.append({"text": table, "doc_type_kwd": "table"})
|
||||
result.append(
|
||||
{
|
||||
"text": table,
|
||||
"doc_type_kwd": "text" if flatten_media_to_text else "table",
|
||||
}
|
||||
)
|
||||
|
||||
self.set_output("json", result)
|
||||
|
||||
@@ -771,6 +784,7 @@ class Parser(ProcessBase):
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a DOCX document")
|
||||
conf = self._param.setups["docx"]
|
||||
self.set_output("output_format", conf["output_format"])
|
||||
flatten_media_to_text = conf.get("flatten_media_to_text")
|
||||
|
||||
if re.search(r"\.doc$", name, re.IGNORECASE):
|
||||
self.set_output("file", {**kwargs.get("file", {}), "outlines": []})
|
||||
@@ -823,7 +837,7 @@ class Parser(ProcessBase):
|
||||
{
|
||||
"text": text,
|
||||
"image": image,
|
||||
"doc_type_kwd": "image" if image is not None else "text",
|
||||
"doc_type_kwd": "text" if flatten_media_to_text or image is None else "image",
|
||||
}
|
||||
)
|
||||
if html:
|
||||
@@ -831,7 +845,7 @@ class Parser(ProcessBase):
|
||||
{
|
||||
"text": html,
|
||||
"image": None,
|
||||
"doc_type_kwd": "table",
|
||||
"doc_type_kwd": "text" if flatten_media_to_text else "table",
|
||||
}
|
||||
)
|
||||
enhance_media_sections_with_vision(
|
||||
@@ -927,6 +941,7 @@ class Parser(ProcessBase):
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a markdown.")
|
||||
conf = self._param.setups["markdown"]
|
||||
self.set_output("output_format", conf["output_format"])
|
||||
flatten_media_to_text = conf.get("flatten_media_to_text")
|
||||
|
||||
markdown_parser = naive_markdown_parser()
|
||||
sections, tables, section_images = markdown_parser(
|
||||
@@ -952,7 +967,11 @@ class Parser(ProcessBase):
|
||||
# If multiple images found, combine them using concat_img
|
||||
combined_image = reduce(concat_img, images) if len(images) > 1 else images[0]
|
||||
json_result["image"] = combined_image
|
||||
json_result["doc_type_kwd"] = "image" if json_result.get("image") is not None else "text"
|
||||
json_result["doc_type_kwd"] = (
|
||||
"text"
|
||||
if flatten_media_to_text or json_result.get("image") is None
|
||||
else "image"
|
||||
)
|
||||
json_results.append(json_result)
|
||||
|
||||
for table in tables:
|
||||
@@ -961,7 +980,7 @@ class Parser(ProcessBase):
|
||||
json_results.append(
|
||||
{
|
||||
"text": table_text,
|
||||
"doc_type_kwd": "table",
|
||||
"doc_type_kwd": "text" if flatten_media_to_text else "table",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -1575,6 +1575,9 @@ Best for: Documents with flowing, contextually connected content — such as boo
|
||||
oneChunkTitle: 'Note',
|
||||
oneChunkDescription:
|
||||
'All parsed sections will be merged in order into a single chunk.',
|
||||
flattenMediaToText: 'Disable vision model',
|
||||
flattenMediaToTextTip:
|
||||
'Treat image and table sections as plain text and skip vision enhancement.',
|
||||
enableChildrenDelimiters: 'Child chunk are used for retrieval',
|
||||
merge: 'Merge',
|
||||
split: 'Split',
|
||||
|
||||
@@ -1324,6 +1324,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
oneChunkTitle: 'Note',
|
||||
oneChunkDescription:
|
||||
'所有解析后的 sections 会按原始顺序合并为 1 个 chunk。',
|
||||
flattenMediaToText: '禁用视觉模型',
|
||||
flattenMediaToTextTip: '将图片和表格区块按普通文本处理,并跳过视觉增强。',
|
||||
merge: '合并',
|
||||
split: '拆分',
|
||||
script: '脚本',
|
||||
|
||||
@@ -197,12 +197,14 @@ export const initialParserValues = {
|
||||
output_format: PdfOutputFormat.Json,
|
||||
parse_method: ParseDocumentType.DeepDOC,
|
||||
preprocess: PreprocessValue.main_content,
|
||||
flatten_media_to_text: false,
|
||||
},
|
||||
{
|
||||
fileFormat: FileType.Spreadsheet,
|
||||
output_format: SpreadsheetOutputFormat.Html,
|
||||
parse_method: ParseDocumentType.DeepDOC,
|
||||
preprocess: PreprocessValue.main_content,
|
||||
flatten_media_to_text: false,
|
||||
},
|
||||
{
|
||||
fileFormat: FileType.Image,
|
||||
@@ -221,6 +223,7 @@ export const initialParserValues = {
|
||||
fileFormat: FileType.TextMarkdown,
|
||||
output_format: TextMarkdownOutputFormat.Text,
|
||||
preprocess: PreprocessValue.main_content,
|
||||
flatten_media_to_text: false,
|
||||
},
|
||||
{
|
||||
fileFormat: FileType.Code,
|
||||
@@ -241,6 +244,7 @@ export const initialParserValues = {
|
||||
fileFormat: FileType.Docx,
|
||||
output_format: DocxOutputFormat.Json,
|
||||
preprocess: PreprocessValue.main_content,
|
||||
flatten_media_to_text: false,
|
||||
},
|
||||
{
|
||||
fileFormat: FileType.PowerPoint,
|
||||
|
||||
@@ -88,6 +88,29 @@ export function LargeModelFormField({
|
||||
);
|
||||
}
|
||||
|
||||
export function FlattenMediaToTextFormField({ prefix }: CommonProps) {
|
||||
const { t } = useTranslation();
|
||||
return (
|
||||
<RAGFlowFormItem
|
||||
name={buildFieldNameWithPrefix(`flatten_media_to_text`, prefix)}
|
||||
label={t('flow.flattenMediaToText')}
|
||||
tooltip={t('flow.flattenMediaToTextTip')}
|
||||
horizontal={true}
|
||||
labelClassName="w-full"
|
||||
valueClassName="w-8"
|
||||
>
|
||||
{(field) => (
|
||||
<Switch
|
||||
checked={field.value}
|
||||
onCheckedChange={(checked) => {
|
||||
field.onChange?.(checked);
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
);
|
||||
}
|
||||
|
||||
export function TwoColumnCheckFormField({ prefix }: CommonProps) {
|
||||
const { t } = useTranslation();
|
||||
return (
|
||||
|
||||
@@ -154,6 +154,7 @@ export const FormSchema = z.object({
|
||||
lang: z.string().optional(),
|
||||
fields: z.array(z.string()).optional(),
|
||||
vlm: z.object({ llm_id: z.string().optional() }).optional(),
|
||||
flatten_media_to_text: z.boolean().optional(),
|
||||
system_prompt: z.string().optional(),
|
||||
table_result_type: z.string().optional(),
|
||||
markdown_image_response_type: z.string().optional(),
|
||||
|
||||
@@ -11,6 +11,7 @@ import { useEffect, useMemo } from 'react';
|
||||
import { useFormContext, useWatch } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import {
|
||||
FlattenMediaToTextFormField,
|
||||
LanguageFormField,
|
||||
LargeModelFormField,
|
||||
ParserMethodFormField,
|
||||
@@ -42,6 +43,9 @@ export function PdfFormFields({ prefix }: CommonProps) {
|
||||
const parseMethod = useWatch({
|
||||
name: parseMethodName,
|
||||
});
|
||||
const flattenMediaToText = useWatch({
|
||||
name: buildFieldNameWithPrefix('flatten_media_to_text', prefix),
|
||||
});
|
||||
|
||||
const languageShown = useMemo(() => {
|
||||
return (
|
||||
@@ -101,11 +105,13 @@ export function PdfFormFields({ prefix }: CommonProps) {
|
||||
<TwoColumnCheckFormField prefix={prefix} />
|
||||
<RmdirFormField prefix={prefix} />
|
||||
<ParserMethodFormField prefix={prefix}></ParserMethodFormField>
|
||||
|
||||
<LargeModelFormField
|
||||
prefix={prefix}
|
||||
options={modelOptions}
|
||||
></LargeModelFormField>
|
||||
<FlattenMediaToTextFormField prefix={prefix} />
|
||||
{!flattenMediaToText && (
|
||||
<LargeModelFormField
|
||||
prefix={prefix}
|
||||
options={modelOptions}
|
||||
></LargeModelFormField>
|
||||
)}
|
||||
{languageShown && <LanguageFormField prefix={prefix}></LanguageFormField>}
|
||||
{tcadpOptionsShown && (
|
||||
<>
|
||||
|
||||
@@ -11,6 +11,7 @@ import { useEffect, useMemo } from 'react';
|
||||
import { useFormContext, useWatch } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import {
|
||||
FlattenMediaToTextFormField,
|
||||
LargeModelFormField,
|
||||
ParserMethodFormField,
|
||||
} from './common-form-fields';
|
||||
@@ -39,6 +40,9 @@ export function SpreadsheetFormFields({ prefix }: CommonProps) {
|
||||
const parseMethod = useWatch({
|
||||
name: parseMethodName,
|
||||
});
|
||||
const flattenMediaToText = useWatch({
|
||||
name: buildFieldNameWithPrefix('flatten_media_to_text', prefix),
|
||||
});
|
||||
|
||||
// Spreadsheet only supports DeepDOC and TCADPParser
|
||||
const optionsWithoutLLM = [
|
||||
@@ -97,10 +101,13 @@ export function SpreadsheetFormFields({ prefix }: CommonProps) {
|
||||
prefix={prefix}
|
||||
optionsWithoutLLM={optionsWithoutLLM}
|
||||
></ParserMethodFormField>
|
||||
<LargeModelFormField
|
||||
prefix={prefix}
|
||||
options={modelOptions}
|
||||
></LargeModelFormField>
|
||||
<FlattenMediaToTextFormField prefix={prefix} />
|
||||
{!flattenMediaToText && (
|
||||
<LargeModelFormField
|
||||
prefix={prefix}
|
||||
options={modelOptions}
|
||||
></LargeModelFormField>
|
||||
)}
|
||||
{tcadpOptionsShown && (
|
||||
<>
|
||||
<RAGFlowFormItem
|
||||
|
||||
@@ -1,20 +1,32 @@
|
||||
import { LlmModelType } from '@/constants/knowledge';
|
||||
import { useComposeLlmOptionsByModelTypes } from '@/hooks/use-llm-request';
|
||||
import { LargeModelFormField, RmdirFormField } from './common-form-fields';
|
||||
import { useWatch } from 'react-hook-form';
|
||||
import {
|
||||
FlattenMediaToTextFormField,
|
||||
LargeModelFormField,
|
||||
RmdirFormField,
|
||||
} from './common-form-fields';
|
||||
import { CommonProps } from './interface';
|
||||
import { buildFieldNameWithPrefix } from './utils';
|
||||
|
||||
export function TextMarkdownFormFields({ prefix }: CommonProps) {
|
||||
const modelOptions = useComposeLlmOptionsByModelTypes([
|
||||
LlmModelType.Image2text,
|
||||
]);
|
||||
const flattenMediaToText = useWatch({
|
||||
name: buildFieldNameWithPrefix('flatten_media_to_text', prefix),
|
||||
});
|
||||
|
||||
return (
|
||||
<>
|
||||
<RmdirFormField prefix={prefix} />
|
||||
<LargeModelFormField
|
||||
prefix={prefix}
|
||||
options={modelOptions}
|
||||
></LargeModelFormField>
|
||||
<FlattenMediaToTextFormField prefix={prefix} />
|
||||
{!flattenMediaToText && (
|
||||
<LargeModelFormField
|
||||
prefix={prefix}
|
||||
options={modelOptions}
|
||||
></LargeModelFormField>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,24 +1,32 @@
|
||||
import { LlmModelType } from '@/constants/knowledge';
|
||||
import { useComposeLlmOptionsByModelTypes } from '@/hooks/use-llm-request';
|
||||
import { useWatch } from 'react-hook-form';
|
||||
import {
|
||||
FlattenMediaToTextFormField,
|
||||
LargeModelFormField,
|
||||
OutputFormatFormFieldProps,
|
||||
RmdirFormField,
|
||||
} from './common-form-fields';
|
||||
import { buildFieldNameWithPrefix } from './utils';
|
||||
|
||||
export function WordFormFields({ prefix }: OutputFormatFormFieldProps) {
|
||||
const modelOptions = useComposeLlmOptionsByModelTypes([
|
||||
LlmModelType.Image2text,
|
||||
]);
|
||||
const flattenMediaToText = useWatch({
|
||||
name: buildFieldNameWithPrefix('flatten_media_to_text', prefix),
|
||||
});
|
||||
|
||||
return (
|
||||
<>
|
||||
<RmdirFormField prefix={prefix} />
|
||||
{/* Multimodal Model */}
|
||||
<LargeModelFormField
|
||||
prefix={prefix}
|
||||
options={modelOptions}
|
||||
></LargeModelFormField>
|
||||
<FlattenMediaToTextFormField prefix={prefix} />
|
||||
{!flattenMediaToText && (
|
||||
<LargeModelFormField
|
||||
prefix={prefix}
|
||||
options={modelOptions}
|
||||
></LargeModelFormField>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -228,6 +228,7 @@ function transformParserParams(params: ParserFormSchemaType) {
|
||||
parse_method: cur.parse_method,
|
||||
lang: cur.lang,
|
||||
vlm: { llm_id: cur.vlm?.llm_id },
|
||||
flatten_media_to_text: cur.flatten_media_to_text,
|
||||
enable_multi_column: cur.enable_multi_column,
|
||||
remove_toc: cur.remove_toc,
|
||||
};
|
||||
@@ -243,6 +244,7 @@ function transformParserParams(params: ParserFormSchemaType) {
|
||||
...filteredSetup,
|
||||
parse_method: cur.parse_method,
|
||||
vlm: { llm_id: cur.vlm?.llm_id },
|
||||
flatten_media_to_text: cur.flatten_media_to_text,
|
||||
};
|
||||
// Only include TCADP parameters if TCADP Parser is selected
|
||||
if (cur.parse_method?.toLowerCase() === 'tcadp parser') {
|
||||
@@ -277,10 +279,16 @@ function transformParserParams(params: ParserFormSchemaType) {
|
||||
fields: cur.fields,
|
||||
};
|
||||
break;
|
||||
case FileType.Video:
|
||||
case FileType.Docx:
|
||||
case FileType.Audio:
|
||||
case FileType.TextMarkdown:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
vlm: { llm_id: cur.vlm?.llm_id },
|
||||
flatten_media_to_text: cur.flatten_media_to_text,
|
||||
};
|
||||
break;
|
||||
case FileType.Video:
|
||||
case FileType.Audio:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
vlm: { llm_id: cur.vlm?.llm_id },
|
||||
|
||||
Reference in New Issue
Block a user