mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Fix: RAPTOR "Generation scope" reset to "Single file" when selecting "Dataset" (#14477)
## Problem
In the Dataset Configuration page, changing the RAPTOR **Generation
scope** from "Single file" to "Dataset" and clicking **Save** did not
persist the change. After refreshing or re-entering the page, the scope
always reverted to "Single file".
## Root Cause
1. **Backend**: The `RaptorConfig` Pydantic model in
`api/utils/validation_utils.py` was configured with `extra="forbid"` but
did not declare a `scope` field. When the frontend sent `"scope":
"dataset"`, Pydantic rejected the request.
2. **Frontend**: The `extractRaptorConfigExt` utility in
`web/src/hooks/parser-config-utils.ts` treated `scope` as an unknown
field and moved it into the nested `ext` object. Consequently, the
backend could not read `raptor_config.get("scope", "file")` correctly,
so the default `"file"` was always used.
## Changes
- Added `scope: Literal["file", "dataset"]` to the backend
`RaptorConfig` model with a default of `"file"`.
- Added `scope` to the known-field whitelist in the frontend
`extractRaptorConfigExt` helper so it is transmitted as a top-level
raptor field instead of being buried in `ext`.
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
---------
Signed-off-by: noob <yixiao121314@outlook.com>
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
import math
|
||||
import pathlib
|
||||
import re
|
||||
@@ -22,16 +23,7 @@ from typing import Annotated, Any, Literal
|
||||
from uuid import UUID
|
||||
|
||||
from quart import Request
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
ConfigDict,
|
||||
Field,
|
||||
StringConstraints,
|
||||
ValidationError,
|
||||
field_validator,
|
||||
model_validator,
|
||||
ValidationInfo
|
||||
)
|
||||
from pydantic import BaseModel, ConfigDict, Field, StringConstraints, ValidationError, field_validator, model_validator, ValidationInfo
|
||||
from pydantic_core import PydanticCustomError
|
||||
from werkzeug.exceptions import BadRequest, UnsupportedMediaType
|
||||
|
||||
@@ -170,12 +162,13 @@ def validate_and_parse_request_args(request: Request, validator: type[BaseModel]
|
||||
args = request.args.to_dict(flat=True)
|
||||
|
||||
# Handle ext parameter: parse JSON string to dict if it's a string
|
||||
if 'ext' in args and isinstance(args['ext'], str):
|
||||
if "ext" in args and isinstance(args["ext"], str):
|
||||
import json
|
||||
|
||||
try:
|
||||
args['ext'] = json.loads(args['ext'])
|
||||
args["ext"] = json.loads(args["ext"])
|
||||
except json.JSONDecodeError:
|
||||
pass # Keep the string and let validation handle the error
|
||||
logging.debug("Failed to decode query arg 'ext' as JSON; passing raw value to validator")
|
||||
|
||||
try:
|
||||
if extras is not None:
|
||||
@@ -350,6 +343,7 @@ class RaptorConfig(Base):
|
||||
threshold: Annotated[float, Field(default=0.1, ge=0.0, le=1.0)]
|
||||
max_cluster: Annotated[int, Field(default=64, ge=1, le=1024)]
|
||||
random_seed: Annotated[int, Field(default=0, ge=0)]
|
||||
scope: Annotated[Literal["file", "dataset"], Field(default="file")]
|
||||
auto_disable_for_structured_data: Annotated[bool, Field(default=True)]
|
||||
ext: Annotated[dict, Field(default={})]
|
||||
|
||||
@@ -401,6 +395,7 @@ class ParserConfig(Base):
|
||||
pages: Annotated[list[list[int]] | None, Field(default=None)]
|
||||
ext: Annotated[dict, Field(default={})]
|
||||
|
||||
|
||||
class UpdateDocumentReq(Base):
|
||||
"""
|
||||
Request model for updating a document.
|
||||
@@ -408,7 +403,8 @@ class UpdateDocumentReq(Base):
|
||||
This model validates the request parameters for updating a document,
|
||||
including name, chunk method, enabled status, and other metadata.
|
||||
"""
|
||||
model_config = ConfigDict(extra='ignore')
|
||||
|
||||
model_config = ConfigDict(extra="ignore")
|
||||
name: Annotated[str | None, Field(default=None, max_length=65535)]
|
||||
chunk_method: Annotated[str | None, Field(default=None, max_length=65535)]
|
||||
pipeline_id: Annotated[str | None, Field(default=None, max_length=65535)]
|
||||
@@ -426,7 +422,7 @@ class UpdateDocumentReq(Base):
|
||||
# Validate chunk method if present
|
||||
valid_chunk_method = {"naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "knowledge_graph", "email", "tag"}
|
||||
if chunk_method not in valid_chunk_method:
|
||||
raise PydanticCustomError("format_invalid", "`chunk_method` {chunk_method} doesn't exist", {"chunk_method":chunk_method})
|
||||
raise PydanticCustomError("format_invalid", "`chunk_method` {chunk_method} doesn't exist", {"chunk_method": chunk_method})
|
||||
|
||||
return chunk_method
|
||||
|
||||
@@ -436,7 +432,7 @@ class UpdateDocumentReq(Base):
|
||||
if enabled:
|
||||
converted = int(enabled)
|
||||
if converted < 0 or converted > 1:
|
||||
raise PydanticCustomError("format_invalid", "`enabled` value invalid, only accept 0 or 1 but is {enabled}", {"enabled":enabled})
|
||||
raise PydanticCustomError("format_invalid", "`enabled` value invalid, only accept 0 or 1 but is {enabled}", {"enabled": enabled})
|
||||
|
||||
return enabled
|
||||
|
||||
@@ -451,11 +447,12 @@ class UpdateDocumentReq(Base):
|
||||
for k, v in meta_fields.items():
|
||||
if isinstance(v, list):
|
||||
if not all(isinstance(i, (str, int, float)) for i in v):
|
||||
raise PydanticCustomError("format_invalid", "The type is not supported in list: {v}", {"v":v})
|
||||
raise PydanticCustomError("format_invalid", "The type is not supported in list: {v}", {"v": v})
|
||||
elif not isinstance(v, (str, int, float)):
|
||||
raise PydanticCustomError("format_invalid", "The type is not supported: {v}", {"v":v})
|
||||
raise PydanticCustomError("format_invalid", "The type is not supported: {v}", {"v": v})
|
||||
return meta_fields
|
||||
|
||||
|
||||
class CreateDatasetReq(Base):
|
||||
name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(...)]
|
||||
avatar: Annotated[str | None, Field(default=None, max_length=65535)]
|
||||
@@ -708,8 +705,7 @@ class CreateDatasetReq(Base):
|
||||
@classmethod
|
||||
def validate_chunk_method(cls, v: Any, handler, info: ValidationInfo) -> Any:
|
||||
"""Wrap validation to unify error messages, including type errors (e.g. list)."""
|
||||
allowed = {"naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table",
|
||||
"tag", "resume"}
|
||||
allowed = {"naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag", "resume"}
|
||||
error_msg = "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table', 'tag' or 'resume'"
|
||||
try:
|
||||
# Run inner validation (type checking)
|
||||
@@ -864,6 +860,7 @@ class ListDatasetReq(BaseListReq):
|
||||
|
||||
# ---- File Management Request Models ----
|
||||
|
||||
|
||||
class CreateFolderReq(Base):
|
||||
name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(...)]
|
||||
parent_id: Annotated[str | None, Field(default=None)]
|
||||
@@ -879,7 +876,7 @@ class MoveFileReq(Base):
|
||||
dest_file_id: Annotated[str | None, Field(default=None)]
|
||||
new_name: Annotated[str | None, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(default=None)]
|
||||
|
||||
@model_validator(mode='after')
|
||||
@model_validator(mode="after")
|
||||
def check_operation(self):
|
||||
if not self.dest_file_id and not self.new_name:
|
||||
raise ValueError("At least one of dest_file_id or new_name must be provided")
|
||||
@@ -899,7 +896,7 @@ class ListFileReq(BaseModel):
|
||||
desc: Annotated[bool, Field(default=True)]
|
||||
|
||||
|
||||
def validate_immutable_fields(update_doc_req:UpdateDocumentReq, doc):
|
||||
def validate_immutable_fields(update_doc_req: UpdateDocumentReq, doc):
|
||||
"""
|
||||
Validate that immutable fields have not been changed.
|
||||
|
||||
@@ -929,7 +926,7 @@ def validate_immutable_fields(update_doc_req:UpdateDocumentReq, doc):
|
||||
return None, None
|
||||
|
||||
|
||||
def validate_document_name(req_doc_name:str, doc, docs_from_name):
|
||||
def validate_document_name(req_doc_name: str, doc, docs_from_name):
|
||||
"""
|
||||
Validate document name update.
|
||||
|
||||
@@ -960,6 +957,7 @@ def validate_document_name(req_doc_name:str, doc, docs_from_name):
|
||||
return "Duplicated document name in the same dataset.", RetCode.DATA_ERROR
|
||||
return None, None
|
||||
|
||||
|
||||
def validate_chunk_method(doc, chunk_method=None):
|
||||
"""
|
||||
Validate chunk method update.
|
||||
@@ -975,9 +973,8 @@ def validate_chunk_method(doc, chunk_method=None):
|
||||
A tuple of (error_message, error_code) if validation fails,
|
||||
or (None, None) if validation passes.
|
||||
"""
|
||||
if chunk_method is not None and len(chunk_method) == 0: # will not be detected in UpdateDocumentReq
|
||||
if chunk_method is not None and len(chunk_method) == 0: # will not be detected in UpdateDocumentReq
|
||||
return "`chunk_method` (empty string) is not valid", RetCode.DATA_ERROR
|
||||
if doc.type == FileType.VISUAL or re.search(r"\.(ppt|pptx|pages)$", doc.name):
|
||||
return "Not supported yet!", RetCode.DATA_ERROR
|
||||
return None, None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user