mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Refactor: Merge document update API (#13962)
### What problem does this PR solve? Refactor: merge document.rename into document.update_document ### Type of change - [x] Refactoring <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added a unified document update API (PUT) supporting name, metadata, parser/chunk settings, and status changes. * **Breaking Changes** * Legacy single-parameter rename endpoint removed; renames now require dataset + document identifiers. * `/list` now reads dataset id from a different query parameter. * **Validation / Bug Fixes** * Stricter meta_fields and parser-config validation; unauthenticated requests return 401. * **Frontend** * UI now sends dataset id when saving document names. * **Tests** * Numerous unit and HTTP tests adjusted or removed to match new API and validations. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: MkDev11 <94194147+MkDev11@users.noreply.github.com> Co-authored-by: mkdev11 <YOUR_GITHUB_ID+MkDev11@users.noreply.github.com> Co-authored-by: mkdev11 <MkDev11@users.noreply.github.com> Co-authored-by: Qi Wang <wangq8@outlook.com> Co-authored-by: dataCenter430 <161712630+dataCenter430@users.noreply.github.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com>
This commit is contained in:
@@ -15,7 +15,6 @@
|
||||
#
|
||||
import json
|
||||
import os.path
|
||||
import pathlib
|
||||
import re
|
||||
from pathlib import Path, PurePosixPath, PureWindowsPath
|
||||
|
||||
@@ -49,7 +48,7 @@ from common.file_utils import get_project_base_directory
|
||||
from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema
|
||||
from common.misc_utils import get_uuid, thread_pool_exec
|
||||
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
||||
from rag.nlp import rag_tokenizer, search
|
||||
from rag.nlp import search
|
||||
|
||||
|
||||
def _is_safe_download_filename(name: str) -> bool:
|
||||
@@ -666,61 +665,6 @@ async def run():
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/rename", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("doc_id", "name")
|
||||
async def rename():
|
||||
req = await get_request_json()
|
||||
uid = current_user.id
|
||||
try:
|
||||
|
||||
def _rename_sync():
|
||||
if not DocumentService.accessible(req["doc_id"], uid):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
||||
return get_json_result(data=False, message="The extension of file can't be changed", code=RetCode.ARGUMENT_ERROR)
|
||||
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=RetCode.ARGUMENT_ERROR)
|
||||
|
||||
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
||||
if d.name == req["name"]:
|
||||
return get_data_error_result(message="Duplicated document name in the same dataset.")
|
||||
|
||||
if not DocumentService.update_by_id(req["doc_id"], {"name": req["name"]}):
|
||||
return get_data_error_result(message="Database error (Document rename)!")
|
||||
|
||||
informs = File2DocumentService.get_by_document_id(req["doc_id"])
|
||||
if informs:
|
||||
e, file = FileService.get_by_id(informs[0].file_id)
|
||||
FileService.update_by_id(file.id, {"name": req["name"]})
|
||||
|
||||
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||
title_tks = rag_tokenizer.tokenize(req["name"])
|
||||
es_body = {
|
||||
"docnm_kwd": req["name"],
|
||||
"title_tks": title_tks,
|
||||
"title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
|
||||
}
|
||||
if settings.docStoreConn.index_exist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.update(
|
||||
{"doc_id": req["doc_id"]},
|
||||
es_body,
|
||||
search.index_name(tenant_id),
|
||||
doc.kb_id,
|
||||
)
|
||||
return get_json_result(data=True)
|
||||
|
||||
return await thread_pool_exec(_rename_sync)
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/get/<doc_id>", methods=["GET"]) # noqa: F821
|
||||
@login_required
|
||||
async def get(doc_id):
|
||||
|
||||
146
api/apps/restful_apis/document_api.py
Normal file
146
api/apps/restful_apis/document_api.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#
|
||||
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
|
||||
from peewee import OperationalError
|
||||
from pydantic import ValidationError
|
||||
|
||||
from api.apps.services.document_api_service import rename_doc_key, validate_document_update_fields, \
|
||||
update_document_name_only, update_chunk_method_only, update_document_status_only
|
||||
from api.db.services.doc_metadata_service import DocMetadataService
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from common.constants import RetCode
|
||||
from api.apps import login_required
|
||||
from api.utils.api_utils import get_error_data_result, get_result, add_tenant_id_to_kwargs, get_request_json
|
||||
from api.utils.validation_utils import (
|
||||
UpdateDocumentReq, format_validation_error_message,
|
||||
)
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PUT"]) # noqa: F821
|
||||
@login_required
|
||||
@add_tenant_id_to_kwargs
|
||||
async def update_document(tenant_id, dataset_id, document_id):
|
||||
"""
|
||||
Update a document within a dataset.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: path
|
||||
name: dataset_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the dataset.
|
||||
- in: path
|
||||
name: document_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the document to update.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
- in: body
|
||||
name: body
|
||||
description: Document update parameters.
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: New name of the document.
|
||||
parser_config:
|
||||
type: object
|
||||
description: Parser configuration.
|
||||
chunk_method:
|
||||
type: string
|
||||
description: Chunking method.
|
||||
enabled:
|
||||
type: boolean
|
||||
description: Document status.
|
||||
responses:
|
||||
200:
|
||||
description: Document updated successfully.
|
||||
schema:
|
||||
type: object
|
||||
"""
|
||||
req = await get_request_json()
|
||||
|
||||
# Verify ownership and existence of dataset and document
|
||||
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
||||
return get_error_data_result(message="You don't own the dataset.")
|
||||
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if not e:
|
||||
return get_error_data_result(message="Can't find this dataset!")
|
||||
|
||||
# Prepare data for validation
|
||||
docs = DocumentService.query(kb_id=dataset_id, id=document_id)
|
||||
if not docs:
|
||||
return get_error_data_result(message="The dataset doesn't own the document.")
|
||||
|
||||
# Validate document update request parameters
|
||||
try:
|
||||
update_doc_req = UpdateDocumentReq(**req)
|
||||
except ValidationError as e:
|
||||
return get_error_data_result(message=format_validation_error_message(e), code=RetCode.DATA_ERROR)
|
||||
|
||||
doc = docs[0]
|
||||
|
||||
# further check with inner status (from DB)
|
||||
error_msg, error_code = validate_document_update_fields(update_doc_req, doc, req)
|
||||
if error_msg:
|
||||
return get_error_data_result(message=error_msg, code=error_code)
|
||||
|
||||
# All validations passed, now perform all updates
|
||||
# meta_fields provided, then update it
|
||||
if "meta_fields" in req:
|
||||
if not DocMetadataService.update_document_metadata(document_id, update_doc_req.meta_fields):
|
||||
return get_error_data_result(message="Failed to update metadata")
|
||||
# doc name provided from request and diff with existing value, update
|
||||
if "name" in req and req["name"] != doc.name:
|
||||
if error := update_document_name_only(document_id, req["name"]):
|
||||
return error
|
||||
|
||||
# parser config provided (already validated in UpdateDocumentReq), update it
|
||||
if update_doc_req.parser_config:
|
||||
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
||||
|
||||
# chunk method provided - the update method will check if it's different with existing one
|
||||
if update_doc_req.chunk_method:
|
||||
if error := update_chunk_method_only(req, doc, dataset_id, tenant_id):
|
||||
return error
|
||||
|
||||
if "enabled" in req: # already checked in UpdateDocumentReq - it's int if it's present
|
||||
# "enabled" flag provided, the update method will check if it's changed and then update if so
|
||||
if error := update_document_status_only(int(req["enabled"]), doc, kb):
|
||||
return error
|
||||
|
||||
try:
|
||||
original_doc_id = doc.id
|
||||
ok, doc = DocumentService.get_by_id(doc.id)
|
||||
if not ok:
|
||||
return get_error_data_result(message=f"Can not get document by id:{original_doc_id}")
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
renamed_doc = rename_doc_key(doc)
|
||||
return get_result(data=renamed_doc)
|
||||
|
||||
@@ -15,13 +15,11 @@
|
||||
#
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from io import BytesIO
|
||||
|
||||
import xxhash
|
||||
from peewee import OperationalError
|
||||
from pydantic import BaseModel, Field, validator, ValidationError
|
||||
from pydantic import BaseModel, Field, validator
|
||||
from quart import request, send_file
|
||||
|
||||
from api.constants import FILE_NAME_LEN_LIMIT
|
||||
@@ -35,10 +33,8 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.utils import validation_utils
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_parser_config, get_request_json, get_result, server_error_response, token_required
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required
|
||||
from api.utils.image_utils import store_chunk_image
|
||||
from api.utils.validation_utils import format_validation_error_message, UpdateDocumentReq
|
||||
from common import settings
|
||||
from common.constants import FileSource, LLMType, ParserType, RetCode, TaskStatus
|
||||
from common.metadata_utils import convert_conditions, meta_filter
|
||||
@@ -184,215 +180,6 @@ async def upload(dataset_id, tenant_id):
|
||||
renamed_doc_list.append(renamed_doc)
|
||||
return get_result(data=renamed_doc_list)
|
||||
|
||||
|
||||
def _update_document_name_only(document_id, req_doc_name):
|
||||
"""Update document name only (without validation)."""
|
||||
if not DocumentService.update_by_id(document_id, {"name": req_doc_name}):
|
||||
return get_error_data_result(message="Database error (Document rename)!")
|
||||
|
||||
informs = File2DocumentService.get_by_document_id(document_id)
|
||||
if informs:
|
||||
e, file = FileService.get_by_id(informs[0].file_id)
|
||||
FileService.update_by_id(file.id, {"name": req_doc_name})
|
||||
return None
|
||||
|
||||
def _update_chunk_method_only(req, doc, dataset_id, tenant_id):
|
||||
"""Update chunk method only (without validation)."""
|
||||
if doc.parser_id.lower() != req["chunk_method"].lower():
|
||||
# if chunk method changed
|
||||
e = DocumentService.update_by_id(
|
||||
doc.id,
|
||||
{
|
||||
"parser_id": req["chunk_method"],
|
||||
"progress": 0,
|
||||
"progress_msg": "",
|
||||
"run": TaskStatus.UNSTART.value,
|
||||
},
|
||||
)
|
||||
if not e:
|
||||
return get_error_data_result(message="Document not found!")
|
||||
if not req.get("parser_config"):
|
||||
req["parser_config"] = get_parser_config(req["chunk_method"], req.get("parser_config"))
|
||||
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
||||
if doc.token_num > 0:
|
||||
e = DocumentService.increment_chunk_num(
|
||||
doc.id,
|
||||
doc.kb_id,
|
||||
doc.token_num * -1,
|
||||
doc.chunk_num * -1,
|
||||
doc.process_duration * -1,
|
||||
)
|
||||
if not e:
|
||||
return get_error_data_result(message="Document not found!")
|
||||
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
|
||||
return None
|
||||
|
||||
def _update_document_status_only(status:int, doc, kb):
|
||||
"""Update document status only (without validation)."""
|
||||
if doc.status is None or (int(doc.status) != status):
|
||||
try:
|
||||
if not DocumentService.update_by_id(doc.id, {"status": str(status)}):
|
||||
return get_error_data_result(message="Database error (Document update)!")
|
||||
settings.docStoreConn.update({"doc_id": doc.id}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
return None
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PUT"]) # noqa: F821
|
||||
@token_required
|
||||
async def update_doc(tenant_id, dataset_id, document_id):
|
||||
"""
|
||||
Update a document within a dataset.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: path
|
||||
name: dataset_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the dataset.
|
||||
- in: path
|
||||
name: document_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the document to update.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
- in: body
|
||||
name: body
|
||||
description: Document update parameters.
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: New name of the document.
|
||||
parser_config:
|
||||
type: object
|
||||
description: Parser configuration.
|
||||
chunk_method:
|
||||
type: string
|
||||
description: Chunking method.
|
||||
enabled:
|
||||
type: boolean
|
||||
description: Document status.
|
||||
responses:
|
||||
200:
|
||||
description: Document updated successfully.
|
||||
schema:
|
||||
type: object
|
||||
"""
|
||||
req = await get_request_json()
|
||||
|
||||
# Verify ownership and existence of dataset and document
|
||||
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
||||
return get_error_data_result(message="You don't own the dataset.")
|
||||
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if not e:
|
||||
return get_error_data_result(message="Can't find this dataset!")
|
||||
|
||||
# Prepare data for validation
|
||||
docs = DocumentService.query(kb_id=dataset_id, id=document_id)
|
||||
if not docs:
|
||||
return get_error_data_result(message="The dataset doesn't own the document.")
|
||||
|
||||
# Validate document update request parameters
|
||||
try:
|
||||
update_doc_req = UpdateDocumentReq(**req)
|
||||
except ValidationError as e:
|
||||
return get_error_data_result(message=format_validation_error_message(e), code=RetCode.DATA_ERROR)
|
||||
|
||||
doc = docs[0]
|
||||
|
||||
# further check with inner status (from DB)
|
||||
error_msg, error_code = _validate_document_update_fields(update_doc_req, doc, req)
|
||||
if error_msg:
|
||||
return get_error_data_result(message=error_msg, code=error_code)
|
||||
|
||||
# All validations passed, now perform all updates
|
||||
# meta_fields provided, then update it
|
||||
if update_doc_req.meta_fields:
|
||||
if not DocMetadataService.update_document_metadata(document_id, update_doc_req.meta_fields):
|
||||
return get_error_data_result(message="Failed to update metadata")
|
||||
# doc name provided from request and diff with existing value, update
|
||||
if "name" in req and req["name"] != doc.name:
|
||||
if error := _update_document_name_only(document_id, req["name"]):
|
||||
return error
|
||||
|
||||
# parser config provided (already validated in UpdateDocumentReq), update it
|
||||
if update_doc_req.parser_config:
|
||||
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
||||
|
||||
# chunk method provided - the update method will check if it's different with existing one
|
||||
if update_doc_req.chunk_method:
|
||||
if error := _update_chunk_method_only(req, doc, dataset_id, tenant_id):
|
||||
return error
|
||||
|
||||
if "enabled" in req: # already checked in UpdateDocumentReq - it's int if it's present
|
||||
# "enabled" flag provided, the update method will check if it's changed and then update if so
|
||||
if error := _update_document_status_only(int(req["enabled"]), doc, kb):
|
||||
return error
|
||||
|
||||
try:
|
||||
ok, doc = DocumentService.get_by_id(doc.id)
|
||||
if not ok:
|
||||
return get_error_data_result(message="Dataset created failed")
|
||||
except OperationalError as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Database operation failed")
|
||||
|
||||
key_mapping = {
|
||||
"chunk_num": "chunk_count",
|
||||
"kb_id": "dataset_id",
|
||||
"token_num": "token_count",
|
||||
"parser_id": "chunk_method",
|
||||
}
|
||||
run_mapping = {
|
||||
"0": "UNSTART",
|
||||
"1": "RUNNING",
|
||||
"2": "CANCEL",
|
||||
"3": "DONE",
|
||||
"4": "FAIL",
|
||||
}
|
||||
renamed_doc = {}
|
||||
for key, value in doc.to_dict().items():
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_doc[new_key] = value
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
|
||||
return get_result(data=renamed_doc)
|
||||
|
||||
def _validate_document_update_fields(update_doc_req:UpdateDocumentReq, doc, req):
|
||||
"""Validate document update fields in a single method."""
|
||||
# Validate immutable fields
|
||||
error_msg, error_code = validation_utils.validate_immutable_fields(update_doc_req, doc)
|
||||
if error_msg:
|
||||
return error_msg, error_code
|
||||
|
||||
# Validate document name if present
|
||||
if "name" in req and req["name"] != doc.name:
|
||||
docs_from_name = DocumentService.query(name=req["name"], kb_id=doc.kb_id)
|
||||
error_msg, error_code = validation_utils.validate_document_name(req["name"], doc, docs_from_name)
|
||||
if error_msg:
|
||||
return error_msg, error_code
|
||||
|
||||
# Validate chunk method if present
|
||||
if "chunk_method" in req:
|
||||
error_msg, error_code = validation_utils.validate_chunk_method(doc, req["chunk_method"])
|
||||
if error_msg:
|
||||
return error_msg, error_code
|
||||
|
||||
return None, None
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["GET"]) # noqa: F821
|
||||
@token_required
|
||||
async def download(tenant_id, dataset_id, document_id):
|
||||
|
||||
201
api/apps/services/document_api_service.py
Normal file
201
api/apps/services/document_api_service.py
Normal file
@@ -0,0 +1,201 @@
|
||||
#
|
||||
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.db.services.file_service import FileService
|
||||
from api.utils import validation_utils
|
||||
from common import settings
|
||||
from common.constants import TaskStatus
|
||||
from api.utils.api_utils import get_error_data_result, server_error_response, get_parser_config
|
||||
from api.utils.validation_utils import UpdateDocumentReq
|
||||
from rag.nlp import rag_tokenizer, search
|
||||
|
||||
|
||||
def update_document_name_only(document_id, req_doc_name):
|
||||
"""
|
||||
Update document name only (without validation).
|
||||
:param document_id: id (string) of the document
|
||||
:param req_doc_name: new name (string) from request for the document
|
||||
:return: None if all are good; otherwise returns the error message in the JSON format
|
||||
"""
|
||||
if not DocumentService.update_by_id(document_id, {"name": req_doc_name}):
|
||||
return get_error_data_result(message="Database error (Document rename)!")
|
||||
|
||||
informs = File2DocumentService.get_by_document_id(document_id)
|
||||
if informs:
|
||||
e, file = FileService.get_by_id(informs[0].file_id)
|
||||
FileService.update_by_id(file.id, {"name": req_doc_name})
|
||||
# Add logic to update index - refer to rename method in document_app.py
|
||||
tenant_id = DocumentService.get_tenant_id(document_id)
|
||||
title_tks = rag_tokenizer.tokenize(req_doc_name)
|
||||
es_body = {
|
||||
"docnm_kwd": req_doc_name,
|
||||
"title_tks": title_tks,
|
||||
"title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
|
||||
}
|
||||
ok, doc = DocumentService.get_by_id(document_id)
|
||||
if not ok:
|
||||
return get_error_data_result(message=f"Not able to find document by id:{document_id}")
|
||||
if settings.docStoreConn.index_exist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.update(
|
||||
{"doc_id": document_id},
|
||||
es_body,
|
||||
search.index_name(tenant_id),
|
||||
doc.kb_id,
|
||||
)
|
||||
return None
|
||||
|
||||
def update_chunk_method_only(req, doc, dataset_id, tenant_id):
|
||||
"""
|
||||
Update chunk method only (without validation).
|
||||
|
||||
Updates the chunk method and parser configuration for a document,
|
||||
and resets the document's progress if the chunk method changes.
|
||||
Also clears existing chunks from the document store if the method changes.
|
||||
|
||||
Args:
|
||||
req: The request dictionary containing chunk_method and parser_config.
|
||||
doc: The document model from the database.
|
||||
dataset_id: The ID of the dataset containing the document.
|
||||
tenant_id: The tenant ID for the document store.
|
||||
|
||||
Returns:
|
||||
None if successful, or an error result dictionary if failed.
|
||||
"""
|
||||
if doc.parser_id.lower() != req["chunk_method"].lower():
|
||||
# if chunk method changed
|
||||
e = DocumentService.update_by_id(
|
||||
doc.id,
|
||||
{
|
||||
"parser_id": req["chunk_method"],
|
||||
"progress": 0,
|
||||
"progress_msg": "",
|
||||
"run": TaskStatus.UNSTART.value,
|
||||
},
|
||||
)
|
||||
if not e:
|
||||
return get_error_data_result(message="Document not found!")
|
||||
if not req.get("parser_config"):
|
||||
req["parser_config"] = get_parser_config(req["chunk_method"], req.get("parser_config"))
|
||||
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
||||
if doc.token_num > 0:
|
||||
e = DocumentService.increment_chunk_num(
|
||||
doc.id,
|
||||
doc.kb_id,
|
||||
doc.token_num * -1,
|
||||
doc.chunk_num * -1,
|
||||
doc.process_duration * -1,
|
||||
)
|
||||
if not e:
|
||||
return get_error_data_result(message="Document not found!")
|
||||
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
|
||||
return None
|
||||
|
||||
def update_document_status_only(status:int, doc, kb):
|
||||
"""
|
||||
Update document status only (without validation).
|
||||
|
||||
Updates the enabled/disabled status of a document and updates
|
||||
the corresponding index in the document store.
|
||||
|
||||
Args:
|
||||
status: The new status value (0 for disabled, 1 for enabled).
|
||||
doc: The document model from the database.
|
||||
kb: The knowledge base model.
|
||||
|
||||
Returns:
|
||||
None if successful, or an error result dictionary if failed.
|
||||
"""
|
||||
if doc.status is None or (int(doc.status) != status):
|
||||
try:
|
||||
if not DocumentService.update_by_id(doc.id, {"status": str(status)}):
|
||||
return get_error_data_result(message="Database error (Document update)!")
|
||||
settings.docStoreConn.update({"doc_id": doc.id}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
return None
|
||||
|
||||
|
||||
def validate_document_update_fields(update_doc_req:UpdateDocumentReq, doc, req):
|
||||
"""
|
||||
Validate document update fields in a single method.
|
||||
|
||||
Performs comprehensive validation of all document update fields,
|
||||
including immutable fields, document name, and chunk method.
|
||||
|
||||
Args:
|
||||
update_doc_req: The validated update document request.
|
||||
doc: The document model from the database.
|
||||
req: The original request dictionary.
|
||||
|
||||
Returns:
|
||||
A tuple of (error_message, error_code) if validation fails,
|
||||
or (None, None) if validation passes.
|
||||
"""
|
||||
# Validate immutable fields
|
||||
error_msg, error_code = validation_utils.validate_immutable_fields(update_doc_req, doc)
|
||||
if error_msg:
|
||||
return error_msg, error_code
|
||||
|
||||
# Validate document name if present
|
||||
if "name" in req and req["name"] != doc.name:
|
||||
docs_from_name = DocumentService.query(name=req["name"], kb_id=doc.kb_id)
|
||||
error_msg, error_code = validation_utils.validate_document_name(req["name"], doc, docs_from_name)
|
||||
if error_msg:
|
||||
return error_msg, error_code
|
||||
|
||||
# Validate chunk method if present
|
||||
if "chunk_method" in req:
|
||||
error_msg, error_code = validation_utils.validate_chunk_method(doc, req["chunk_method"])
|
||||
if error_msg:
|
||||
return error_msg, error_code
|
||||
|
||||
return None, None
|
||||
|
||||
def rename_doc_key(doc):
|
||||
"""
|
||||
Rename document keys to match API response format.
|
||||
|
||||
Converts internal document model field names to the external API
|
||||
response field names (e.g., 'chunk_num' -> 'chunk_count').
|
||||
|
||||
Args:
|
||||
doc: The document model from the database.
|
||||
|
||||
Returns:
|
||||
A dictionary with renamed keys for API response.
|
||||
"""
|
||||
key_mapping = {
|
||||
"chunk_num": "chunk_count",
|
||||
"kb_id": "dataset_id",
|
||||
"token_num": "token_count",
|
||||
"parser_id": "chunk_method",
|
||||
}
|
||||
run_mapping = {
|
||||
"0": "UNSTART",
|
||||
"1": "RUNNING",
|
||||
"2": "CANCEL",
|
||||
"3": "DONE",
|
||||
"4": "FAIL",
|
||||
}
|
||||
renamed_doc = {}
|
||||
for key, value in doc.to_dict().items():
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_doc[new_key] = value
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
return renamed_doc
|
||||
|
||||
@@ -402,7 +402,14 @@ class ParserConfig(Base):
|
||||
ext: Annotated[dict, Field(default={})]
|
||||
|
||||
class UpdateDocumentReq(Base):
|
||||
"""
|
||||
Request model for updating a document.
|
||||
|
||||
This model validates the request parameters for updating a document,
|
||||
including name, chunk method, enabled status, and other metadata.
|
||||
"""
|
||||
model_config = ConfigDict(extra='ignore')
|
||||
name: Annotated[str | None, Field(default=None, max_length=65535)]
|
||||
chunk_method: Annotated[str | None, Field(default=None, max_length=65535)]
|
||||
enabled: Annotated[int | None, Field(default=None, ge=0, le=1)]
|
||||
chunk_count: Annotated[int | None, Field(default=None, ge=0)]
|
||||
@@ -432,6 +439,22 @@ class UpdateDocumentReq(Base):
|
||||
|
||||
return enabled
|
||||
|
||||
@field_validator("meta_fields", mode="after")
|
||||
@classmethod
|
||||
def validate_document_meta_fields(cls, meta_fields: dict | None):
|
||||
if meta_fields is None:
|
||||
return None
|
||||
|
||||
if not isinstance(meta_fields, dict):
|
||||
raise PydanticCustomError("format_invalid", "Only dictionary type supported")
|
||||
for k, v in meta_fields.items():
|
||||
if isinstance(v, list):
|
||||
if not all(isinstance(i, (str, int, float)) for i in v):
|
||||
raise PydanticCustomError("format_invalid", "The type is not supported in list: {v}", {"v":v})
|
||||
elif not isinstance(v, (str, int, float)):
|
||||
raise PydanticCustomError("format_invalid", "The type is not supported: {v}", {"v":v})
|
||||
return meta_fields
|
||||
|
||||
class CreateDatasetReq(Base):
|
||||
name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=DATASET_NAME_LIMIT), Field(...)]
|
||||
avatar: Annotated[str | None, Field(default=None, max_length=65535)]
|
||||
@@ -854,7 +877,20 @@ class ListFileReq(BaseModel):
|
||||
|
||||
|
||||
def validate_immutable_fields(update_doc_req:UpdateDocumentReq, doc):
|
||||
"""Validate that immutable fields have not been changed."""
|
||||
"""
|
||||
Validate that immutable fields have not been changed.
|
||||
|
||||
Checks that fields like chunk_count, token_count, and progress
|
||||
cannot be modified directly by the user.
|
||||
|
||||
Args:
|
||||
update_doc_req: The validated update document request.
|
||||
doc: The document model from the database.
|
||||
|
||||
Returns:
|
||||
A tuple of (error_message, error_code) if validation fails,
|
||||
or (None, None) if validation passes.
|
||||
"""
|
||||
if update_doc_req.chunk_count and update_doc_req.chunk_count != int(getattr(doc, "chunk_num", -1)):
|
||||
return "Can't change `chunk_count`.", RetCode.DATA_ERROR
|
||||
|
||||
@@ -871,7 +907,24 @@ def validate_immutable_fields(update_doc_req:UpdateDocumentReq, doc):
|
||||
|
||||
|
||||
def validate_document_name(req_doc_name:str, doc, docs_from_name):
|
||||
"""Validate document name update."""
|
||||
"""
|
||||
Validate document name update.
|
||||
|
||||
Checks that the new document name is valid:
|
||||
- Must be a string
|
||||
- Must not exceed the file name length limit
|
||||
- File extension cannot be changed
|
||||
- Must not duplicate an existing document name in the same dataset.
|
||||
|
||||
Args:
|
||||
req_doc_name: The new document name to validate.
|
||||
doc: The document model from the database.
|
||||
docs_from_name: Query result for documents with the new name.
|
||||
|
||||
Returns:
|
||||
A tuple of (error_message, error_code) if validation fails,
|
||||
or (None, None) if validation passes.
|
||||
"""
|
||||
if not isinstance(req_doc_name, str):
|
||||
return f"AttributeError('{type(req_doc_name).__name__}' object has no attribute 'encode')", RetCode.EXCEPTION_ERROR
|
||||
if len(req_doc_name.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||
@@ -885,7 +938,20 @@ def validate_document_name(req_doc_name:str, doc, docs_from_name):
|
||||
return None, None
|
||||
|
||||
def validate_chunk_method(doc, chunk_method=None):
|
||||
"""Validate chunk method update."""
|
||||
"""
|
||||
Validate chunk method update.
|
||||
|
||||
Checks if the chunk method is valid for the given document,
|
||||
particularly for visual documents or specific file types.
|
||||
|
||||
Args:
|
||||
doc: The document model from the database.
|
||||
chunk_method: The chunk method to validate.
|
||||
|
||||
Returns:
|
||||
A tuple of (error_message, error_code) if validation fails,
|
||||
or (None, None) if validation passes.
|
||||
"""
|
||||
if chunk_method is not None and len(chunk_method) == 0: # will not be detected in UpdateDocumentReq
|
||||
return "`chunk_method` (empty string) is not valid", RetCode.DATA_ERROR
|
||||
if doc.type == FileType.VISUAL or re.search(r"\.(ppt|pptx|pages)$", doc.name):
|
||||
|
||||
@@ -366,123 +366,6 @@ class TestDocRoutesUnit:
|
||||
assert res["code"] == module.RetCode.SERVER_ERROR
|
||||
assert res["message"] == "upload failed"
|
||||
|
||||
def test_update_doc_guards_and_error_paths(self, monkeypatch):
|
||||
module = _load_doc_module(monkeypatch)
|
||||
doc = _DummyDoc()
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: [])
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["message"] == "You don't own the dataset."
|
||||
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: [1])
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _id: (False, None))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["message"] == "Can't find this dataset!"
|
||||
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _id: (True, SimpleNamespace(tenant_id="tenant-1")))
|
||||
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [])
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert "doesn't own the document" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [doc])
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"chunk_count": 100}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert "chunk_count" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"token_count": 100}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert "token_count" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"progress": 100}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert "progress" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"meta_fields": []}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["message"] == "Field: <meta_fields> - Message: <Input should be a valid dictionary> - Value: <[]>"
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"meta_fields": {"k": "v"}}))
|
||||
monkeypatch.setattr(module.DocMetadataService, "update_document_metadata", lambda *_args, **_kwargs: False)
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["message"] == "Failed to update metadata"
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"name": "a" * (module.FILE_NAME_LEN_LIMIT + 1)}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["code"] == module.RetCode.ARGUMENT_ERROR
|
||||
assert "bytes or less" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: False)
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"name": "new.txt"}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert "Document rename" in res["message"]
|
||||
|
||||
def test_update_doc_chunk_method_enabled_and_db_error(self, monkeypatch):
|
||||
module = _load_doc_module(monkeypatch)
|
||||
from api.db import FileType
|
||||
visual_doc = _DummyDoc(parser_id="naive", doc_type=FileType.VISUAL)
|
||||
kb = SimpleNamespace(tenant_id="tenant-1")
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "query", lambda **_kwargs: [1])
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "get_by_id", lambda _id: (True, kb))
|
||||
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [visual_doc])
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"chunk_method": "naive"}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["message"] == "Not supported yet!"
|
||||
|
||||
doc = _DummyDoc(token_num=2, chunk_num=1, parser_id="naive")
|
||||
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [doc])
|
||||
monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: False)
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"chunk_method": "manual"}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["message"] == "Document not found!"
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: True)
|
||||
monkeypatch.setattr(module.DocumentService, "update_parser_config", lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(module.DocumentService, "increment_chunk_num", lambda *_args, **_kwargs: False)
|
||||
_patch_docstore(monkeypatch, module, delete=lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"chunk_method": "manual"}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["message"] == "Document not found!"
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "increment_chunk_num", lambda *_args, **_kwargs: True)
|
||||
doc_for_enabled = _DummyDoc(status=False)
|
||||
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [doc_for_enabled])
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _id: (True, doc_for_enabled))
|
||||
monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: False)
|
||||
_patch_docstore(monkeypatch, module, update=lambda *_args, **_kwargs: None, delete=lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"enabled": 1}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert "Document update" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: True)
|
||||
_patch_docstore(monkeypatch, module, update=lambda *_args, **_kwargs: (_ for _ in ()).throw(RuntimeError("boom")), delete=lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)})
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["code"] == 500
|
||||
assert "boom" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _id: (False, None))
|
||||
_patch_docstore(monkeypatch, module, update=lambda *_args, **_kwargs: None, delete=lambda *_args, **_kwargs: None)
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["message"] == "Dataset created failed"
|
||||
|
||||
# cover token reset + docStore deletion branch
|
||||
doc_reset = _DummyDoc(token_num=3, chunk_num=2, parser_id="naive", run=0)
|
||||
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [doc_reset])
|
||||
monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: True)
|
||||
monkeypatch.setattr(module.DocumentService, "increment_chunk_num", lambda *_args, **_kwargs: True)
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _id: (True, doc_reset))
|
||||
_patch_docstore(monkeypatch, module, delete=lambda *_args, **_kwargs: None, update=lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"chunk_method": "manual"}))
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["code"] == 0
|
||||
|
||||
def _raise_operational_error(_id):
|
||||
raise module.OperationalError("db down")
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", _raise_operational_error)
|
||||
res = _run(module.update_doc.__wrapped__("tenant-1", "ds-1", "doc-1"))
|
||||
assert res["message"] == "Database operation failed"
|
||||
|
||||
def test_download_and_download_doc_errors(self, monkeypatch):
|
||||
module = _load_doc_module(monkeypatch)
|
||||
_patch_send_file(monkeypatch, module)
|
||||
|
||||
@@ -21,16 +21,17 @@ from configs import DOCUMENT_NAME_LIMIT, INVALID_API_TOKEN, INVALID_ID_32
|
||||
from libs.auth import RAGFlowHttpApiAuth
|
||||
from configs import DEFAULT_PARSER_CONFIG
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(None, 401, "<Unauthorized '401: Unauthorized'>"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
401,
|
||||
"<Unauthorized '401: Unauthorized'>",
|
||||
),
|
||||
],
|
||||
)
|
||||
@@ -53,13 +54,13 @@ class TestDocumentsUpdated:
|
||||
),
|
||||
(
|
||||
0,
|
||||
100,
|
||||
"""AttributeError(\'int\' object has no attribute \'encode\')""",
|
||||
102,
|
||||
"Field: <name> - Message: <Input should be a valid string> - Value: <0>",
|
||||
),
|
||||
(
|
||||
None,
|
||||
100,
|
||||
"""AttributeError(\'NoneType\' object has no attribute \'encode\')""",
|
||||
"AttributeError('NoneType' object has no attribute 'encode')",
|
||||
),
|
||||
(
|
||||
"",
|
||||
@@ -93,7 +94,7 @@ class TestDocumentsUpdated:
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"document_id, expected_code, expected_message",
|
||||
[
|
||||
@@ -110,7 +111,7 @@ class TestDocumentsUpdated:
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_id, expected_code, expected_message",
|
||||
[
|
||||
@@ -127,10 +128,28 @@ class TestDocumentsUpdated:
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"meta_fields, expected_code, expected_message",
|
||||
[({"test": "test"}, 0, ""), ("test", 102, "meta_fields must be a dictionary")],
|
||||
[
|
||||
# Valid meta_fields
|
||||
({"test": "test"}, 0, ""),
|
||||
# Valid meta_fields with various types
|
||||
({"author": "alice", "year": 2024}, 0, ""),
|
||||
({"tags": ["tag1", "tag2"]}, 0, ""),
|
||||
({"count": 42, "price": 19.99}, 0, ""),
|
||||
# Invalid type - string instead of dict
|
||||
("test", 102, "Field: <meta_fields> - Message: <Input should be a valid dictionary> - Value: <test>"),
|
||||
# Invalid type - list instead of dict
|
||||
([], 102, "Field: <meta_fields> - Message: <Input should be a valid dictionary> - Value: <[]>"),
|
||||
# Invalid - list containing objects (unsupported type in list)
|
||||
({"tags": [{"x": {"a": "b"}}]}, 102, "Field: <meta_fields> - Message: <The type is not supported in list: [{'x': {'a': 'b'}}]> - Value: <{'tags': [{'x': {'a': 'b'}}]}>"),
|
||||
({"tags": [{"x": 1}]}, 102, "Field: <meta_fields> - Message: <The type is not supported in list: [{'x': 1}]> - Value: <{'tags': [{'x': 1}]}>"),
|
||||
# Invalid - nested object with unsupported type
|
||||
({"obj": {"x": 1}}, 102, "Field: <meta_fields> - Message: <The type is not supported: {'x': 1}> - Value: <{'obj': {'x': 1}}>"),
|
||||
# Valid types of list
|
||||
({"tags": [2, 1]}, 0, ""),
|
||||
],
|
||||
)
|
||||
def test_meta_fields(self, HttpApiAuth, add_documents, meta_fields, expected_code, expected_message):
|
||||
dataset_id, document_ids = add_documents
|
||||
@@ -139,7 +158,22 @@ class TestDocumentsUpdated:
|
||||
res = list_documents(HttpApiAuth, dataset_id, {"id": document_ids[0]})
|
||||
assert res["data"]["docs"][0]["meta_fields"] == meta_fields
|
||||
else:
|
||||
assert res["message"] == expected_message
|
||||
assert expected_message in res["message"] or res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"meta_fields, expected_code, expected_message",
|
||||
[
|
||||
# Test with invalid document ID (not owned by dataset)
|
||||
({"author": "alice"}, 102, "The dataset doesn't own the document."),
|
||||
],
|
||||
)
|
||||
def test_meta_fields_invalid_document(self, HttpApiAuth, add_documents, meta_fields, expected_code, expected_message):
|
||||
"""Test meta_fields update with invalid document ID"""
|
||||
dataset_id, _ = add_documents
|
||||
res = update_document(HttpApiAuth, dataset_id, "invalid_doc_id_12345678901234567890", {"meta_fields": meta_fields})
|
||||
assert res["code"] == expected_code
|
||||
assert expected_message in res["message"]
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
@@ -297,6 +331,30 @@ class TestDocumentsUpdated:
|
||||
assert res["code"] == expected_code
|
||||
assert res["message"] == expected_message
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message",
|
||||
[
|
||||
({"chunk_count": 100}, 102, "Can't change `chunk_count`."),
|
||||
({"token_count": 100}, 102, "Can't change `token_count`."),
|
||||
({"progress": 2.0}, 102, "Field: <progress> - Message: <Input should be less than or equal to 1> - Value: <2.0>"),
|
||||
({"progress": 1.0}, 102, "Can't change `progress`."),
|
||||
({"meta_fields": []}, 102, "Field: <meta_fields> - Message: <Input should be a valid dictionary> - Value: <[]>"),
|
||||
],
|
||||
)
|
||||
def test_update_doc_guards_and_error_paths(self, HttpApiAuth, add_documents, payload, expected_code, expected_message):
|
||||
"""
|
||||
Test various guard conditions and error paths for document update functionality.
|
||||
This includes testing for invalid dataset ownership, document ownership,
|
||||
immutable fields, and validation errors.
|
||||
"""
|
||||
dataset_id, document_ids = add_documents
|
||||
document_id = document_ids[0]
|
||||
|
||||
res = update_document(HttpApiAuth, dataset_id, document_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if expected_message:
|
||||
assert expected_message in res["message"] or res["message"] == expected_message
|
||||
|
||||
|
||||
DEFAULT_PARSER_CONFIG_FOR_TEST = {
|
||||
@@ -328,6 +386,7 @@ DEFAULT_PARSER_CONFIG_FOR_TEST = {
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class TestUpdateDocumentParserConfig:
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
@@ -343,37 +402,32 @@ class TestUpdateDocumentParserConfig:
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": -1},
|
||||
100,
|
||||
"AssertionError('chunk_token_num should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.chunk_token_num> - Message: <Input should be greater than or equal to 1> - Value: <-1>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": 0},
|
||||
100,
|
||||
"AssertionError('chunk_token_num should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.chunk_token_num> - Message: <Input should be greater than or equal to 1> - Value: <0>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": 100000000},
|
||||
100,
|
||||
"AssertionError('chunk_token_num should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.chunk_token_num> - Message: <Input should be less than or equal to 2048> - Value: <100000000>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": 3.14},
|
||||
102,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
"Field: <parser_config.chunk_token_num> - Message: <Input should be a valid integer> - Value: <3.14>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.chunk_token_num> - Message: <Input should be a valid integer> - Value: <1024>",
|
||||
),
|
||||
(
|
||||
"naive",
|
||||
@@ -392,148 +446,135 @@ class TestUpdateDocumentParserConfig:
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"html4excel": 1},
|
||||
100,
|
||||
"AssertionError('html4excel should be True or False')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.html4excel> - Message: <Input should be a valid boolean> - Value: <1>",
|
||||
),
|
||||
("naive", {"delimiter": ""}, 102, "Field: <parser_config.delimiter> - Message: <String should have at least 1 character> - Value: <>"),
|
||||
("naive", {"delimiter": "`##`"}, 0, ""),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"delimiter": 1},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.delimiter> - Message: <Input should be a valid string> - Value: <1>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": -1},
|
||||
100,
|
||||
"AssertionError('task_page_size should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.task_page_size> - Message: <Input should be greater than or equal to 1> - Value: <-1>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": 0},
|
||||
100,
|
||||
"AssertionError('task_page_size should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.task_page_size> - Message: <Input should be greater than or equal to 1> - Value: <0>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": 100000000},
|
||||
100,
|
||||
"AssertionError('task_page_size should be in range from 1 to 100000000')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
0,
|
||||
"",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": 3.14},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.task_page_size> - Message: <Input should be a valid integer> - Value: <3.14>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.task_page_size> - Message: <Input should be a valid integer> - Value: <1024>",
|
||||
),
|
||||
(
|
||||
"naive",
|
||||
{
|
||||
"raptor": {
|
||||
"use_raptor": {"a": "b"},
|
||||
}
|
||||
},
|
||||
102,
|
||||
"Field: <parser_config.raptor.use_raptor> - Message: <Input should be a valid boolean> - Value: <{'a': 'b'}>",
|
||||
),
|
||||
("naive", {"raptor": {"use_raptor": {
|
||||
"a": "b"
|
||||
},}}, 102, "Field: <parser_config.raptor.use_raptor> - Message: <Input should be a valid boolean> - Value: <{'a': 'b'}>"),
|
||||
("naive", {"raptor": {"use_raptor": False}}, 0, ""),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"invalid_key": "invalid_value"},
|
||||
100,
|
||||
"""AssertionError("Abnormal \'parser_config\'. Invalid key: invalid_key")""",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.invalid_key> - Message: <Extra inputs are not permitted> - Value: <invalid_value>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_keywords": -1},
|
||||
100,
|
||||
"AssertionError('auto_keywords should be in range from 0 to 32')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.auto_keywords> - Message: <Input should be greater than or equal to 0> - Value: <-1>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_keywords": 32},
|
||||
100,
|
||||
"AssertionError('auto_keywords should be in range from 0 to 32')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": 3.14},
|
||||
100,
|
||||
0,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_keywords": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.auto_keywords> - Message: <Input should be a valid integer> - Value: <1024>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_keywords": 3.14},
|
||||
102,
|
||||
"Field: <parser_config.auto_keywords> - Message: <Input should be a valid integer> - Value: <3.14>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": -1},
|
||||
100,
|
||||
"AssertionError('auto_questions should be in range from 0 to 10')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.auto_questions> - Message: <Input should be greater than or equal to 0> - Value: <-1>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": 10},
|
||||
100,
|
||||
"AssertionError('auto_questions should be in range from 0 to 10')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
0,
|
||||
"",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": 3.14},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.auto_questions> - Message: <Input should be a valid integer> - Value: <3.14>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.auto_questions> - Message: <Input should be a valid integer> - Value: <1024>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"topn_tags": -1},
|
||||
100,
|
||||
"AssertionError('topn_tags should be in range from 0 to 10')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.topn_tags> - Message: <Input should be greater than or equal to 1> - Value: <-1>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"topn_tags": 10},
|
||||
100,
|
||||
"AssertionError('topn_tags should be in range from 0 to 10')",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
0,
|
||||
"",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"topn_tags": 3.14},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.topn_tags> - Message: <Input should be a valid integer> - Value: <3.14>",
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"topn_tags": "1024"},
|
||||
100,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
102,
|
||||
"Field: <parser_config.topn_tags> - Message: <Input should be a valid integer> - Value: <1024>",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
@@ -67,7 +67,7 @@ class TestDocumentsUpdated:
|
||||
updated_doc = dataset.list_documents(id=document.id)[0]
|
||||
assert updated_doc.name == name, str(updated_doc)
|
||||
|
||||
@pytest.mark.p3
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"meta_fields, expected_message",
|
||||
[
|
||||
@@ -238,27 +238,81 @@ class TestDocumentsUpdated:
|
||||
document.update(payload)
|
||||
assert expected_message in str(exception_info.value), str(exception_info.value)
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_immutable_fields_chunk_count(self, add_document):
|
||||
document, _ = add_document # Unpack the tuple to get the document object
|
||||
with pytest.raises(Exception) as exception_info:
|
||||
document.update({"chunk_count": 999}) # Attempt to change immutable field
|
||||
assert "Can't change `chunk_count`" in str(exception_info.value), str(exception_info.value)
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_message",
|
||||
[
|
||||
({"chunk_count": 1}, "Can't change `chunk_count`"),
|
||||
],
|
||||
)
|
||||
def test_immutable_fields_chunk_count(self, add_documents, payload, expected_message):
|
||||
_, documents = add_documents
|
||||
document = documents[0]
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_immutable_fields_token_count(self, add_document):
|
||||
document, _ = add_document # Unpack the tuple to get the document object
|
||||
with pytest.raises(Exception) as exception_info:
|
||||
document.update({"token_count": 9999}) # Attempt to change immutable field
|
||||
assert "Can't change `token_num`" in str(exception_info.value), str(exception_info.value)
|
||||
document.update(payload)
|
||||
assert expected_message in str(exception_info.value), str(exception_info.value)
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_message",
|
||||
[
|
||||
({"token_count": 9999}, "Can't change `token_count`"), # Attempt to change immutable field
|
||||
],
|
||||
)
|
||||
def test_immutable_fields_token_count(self, add_documents, payload, expected_message):
|
||||
_, documents = add_documents
|
||||
document = documents[0]
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_immutable_fields_progress(self, add_document):
|
||||
document, _ = add_document # Unpack the tuple to get the document object
|
||||
with pytest.raises(Exception) as exception_info:
|
||||
document.update({"progress": 0.5}) # Attempt to change immutable field
|
||||
assert "Can't change `progress`" in str(exception_info.value), str(exception_info.value)
|
||||
document.update(payload)
|
||||
assert expected_message in str(exception_info.value), str(exception_info.value)
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_message",
|
||||
[
|
||||
({"progress": 0.5}, "Can't change `progress`"), # Attempt to change immutable field
|
||||
({"progress": 1.5}, "Field: <progress> - Message: <Input should be less than or equal to 1> - Value: <1.5>"), # Attempt to change immutable field
|
||||
],
|
||||
)
|
||||
def test_immutable_fields_progress(self, add_documents, payload, expected_message):
|
||||
_, documents = add_documents
|
||||
document = documents[0]
|
||||
|
||||
with pytest.raises(Exception) as exception_info:
|
||||
document.update(payload)
|
||||
assert expected_message in str(exception_info.value), str(exception_info.value)
|
||||
|
||||
|
||||
DEFAULT_PARSER_CONFIG_FOR_TEST = {
|
||||
"layout_recognize": "DeepDOC",
|
||||
"chunk_token_num": 512,
|
||||
"delimiter": "\n",
|
||||
"auto_keywords": 0,
|
||||
"auto_questions": 0,
|
||||
"html4excel": False,
|
||||
"topn_tags": 3,
|
||||
"raptor": {
|
||||
"use_raptor": True,
|
||||
"prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
|
||||
"max_token": 256,
|
||||
"threshold": 0.1,
|
||||
"max_cluster": 64,
|
||||
"random_seed": 0,
|
||||
},
|
||||
"graphrag": {
|
||||
"use_graphrag": True,
|
||||
"entity_types": [
|
||||
"organization",
|
||||
"person",
|
||||
"geo",
|
||||
"event",
|
||||
"category",
|
||||
],
|
||||
"method": "light",
|
||||
},
|
||||
}
|
||||
|
||||
class TestUpdateDocumentParserConfig:
|
||||
@pytest.mark.p2
|
||||
@@ -268,15 +322,14 @@ class TestUpdateDocumentParserConfig:
|
||||
("naive", {}, ""),
|
||||
pytest.param(
|
||||
"naive",
|
||||
DEFAULT_PARSER_CONFIG,
|
||||
DEFAULT_PARSER_CONFIG_FOR_TEST,
|
||||
"",
|
||||
marks=pytest.mark.skip(reason="DEFAULT_PARSER_CONFIG contains fields not allowed in document update API"),
|
||||
),
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"chunk_token_num": -1},
|
||||
"chunk_token_num should be in range from 1 to 100000000",
|
||||
marks=pytest.mark.skip(reason="issues/6098"),
|
||||
"Field: <parser_config.chunk_token_num> - Message: <Input should be greater than or equal to 1> - Value: <-1>",
|
||||
),
|
||||
(
|
||||
"naive",
|
||||
@@ -327,8 +380,7 @@ class TestUpdateDocumentParserConfig:
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"task_page_size": 100000000},
|
||||
"task_page_size should be in range from 1 to 100000000",
|
||||
marks=pytest.mark.skip(reason="API validation differs from expected message"),
|
||||
"",
|
||||
),
|
||||
(
|
||||
"naive",
|
||||
@@ -360,8 +412,7 @@ class TestUpdateDocumentParserConfig:
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_keywords": 32},
|
||||
"auto_keywords should be in range from 0 to 32",
|
||||
marks=pytest.mark.skip(reason="API validation differs from expected message"),
|
||||
"",
|
||||
),
|
||||
(
|
||||
"naive",
|
||||
@@ -381,8 +432,7 @@ class TestUpdateDocumentParserConfig:
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"auto_questions": 10},
|
||||
"auto_questions should be in range from 0 to 10",
|
||||
marks=pytest.mark.skip(reason="API validation differs from expected message"),
|
||||
"",
|
||||
),
|
||||
(
|
||||
"naive",
|
||||
@@ -402,8 +452,7 @@ class TestUpdateDocumentParserConfig:
|
||||
pytest.param(
|
||||
"naive",
|
||||
{"topn_tags": 10},
|
||||
"topn_tags should be in range from 0 to 10",
|
||||
marks=pytest.mark.skip(reason="API validation differs from expected message"),
|
||||
"",
|
||||
),
|
||||
(
|
||||
"naive",
|
||||
|
||||
@@ -435,16 +435,6 @@ def document_change_status(auth, payload=None, *, headers=HEADERS, data=None):
|
||||
return res.json()
|
||||
|
||||
|
||||
def document_rename(auth, payload=None, *, headers=HEADERS, data=None):
|
||||
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/rename", headers=headers, auth=auth, json=payload, data=data)
|
||||
return res.json()
|
||||
|
||||
|
||||
def document_set_meta(auth, payload=None, *, headers=HEADERS, data=None):
|
||||
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/set_meta", headers=headers, auth=auth, json=payload, data=data)
|
||||
return res.json()
|
||||
|
||||
|
||||
def bulk_upload_documents(auth, kb_id, num, tmp_path):
|
||||
fps = []
|
||||
for i in range(num):
|
||||
|
||||
@@ -22,8 +22,6 @@ from common import (
|
||||
document_filter,
|
||||
document_infos,
|
||||
document_metadata_summary,
|
||||
document_rename,
|
||||
document_set_meta,
|
||||
document_update_metadata_setting,
|
||||
)
|
||||
from configs import INVALID_API_TOKEN
|
||||
@@ -82,21 +80,6 @@ class TestAuthorization:
|
||||
assert res["code"] == expected_code, res
|
||||
assert expected_fragment in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
|
||||
def test_rename_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
|
||||
res = document_rename(invalid_auth, {"doc_id": "doc_id", "name": "rename.txt"})
|
||||
assert res["code"] == expected_code, res
|
||||
assert expected_fragment in res["message"], res
|
||||
|
||||
@pytest.mark.p2
|
||||
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
|
||||
def test_set_meta_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
|
||||
res = document_set_meta(invalid_auth, {"doc_id": "doc_id", "meta": "{}"})
|
||||
assert res["code"] == expected_code, res
|
||||
assert expected_fragment in res["message"], res
|
||||
|
||||
|
||||
class TestDocumentMetadata:
|
||||
@pytest.mark.p2
|
||||
def test_filter(self, WebApiAuth, add_dataset_func):
|
||||
@@ -163,28 +146,6 @@ class TestDocumentMetadata:
|
||||
assert info_res["code"] == 0, info_res
|
||||
assert info_res["data"][0]["status"] == "1", info_res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_rename(self, WebApiAuth, add_document_func):
|
||||
_, doc_id = add_document_func
|
||||
name = f"renamed_{doc_id}.txt"
|
||||
res = document_rename(WebApiAuth, {"doc_id": doc_id, "name": name})
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"] is True, res
|
||||
info_res = document_infos(WebApiAuth, {"doc_ids": [doc_id]})
|
||||
assert info_res["code"] == 0, info_res
|
||||
assert info_res["data"][0]["name"] == name, info_res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_set_meta(self, WebApiAuth, add_document_func):
|
||||
_, doc_id = add_document_func
|
||||
res = document_set_meta(WebApiAuth, {"doc_id": doc_id, "meta": "{\"author\": \"alice\"}"})
|
||||
assert res["code"] == 0, res
|
||||
assert res["data"] is True, res
|
||||
info_res = document_infos(WebApiAuth, {"doc_ids": [doc_id]})
|
||||
assert info_res["code"] == 0, info_res
|
||||
meta_fields = info_res["data"][0].get("meta_fields", {})
|
||||
assert meta_fields.get("author") == "alice", info_res
|
||||
|
||||
|
||||
class TestDocumentMetadataNegative:
|
||||
@pytest.mark.p3
|
||||
@@ -231,20 +192,6 @@ class TestDocumentMetadataNegative:
|
||||
assert res["code"] == 101, res
|
||||
assert "Status" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_rename_extension_mismatch(self, WebApiAuth, add_document_func):
|
||||
_, doc_id = add_document_func
|
||||
res = document_rename(WebApiAuth, {"doc_id": doc_id, "name": "renamed.pdf"})
|
||||
assert res["code"] == 101, res
|
||||
assert "extension" in res["message"], res
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_set_meta_invalid_type(self, WebApiAuth, add_document_func):
|
||||
_, doc_id = add_document_func
|
||||
res = document_set_meta(WebApiAuth, {"doc_id": doc_id, "meta": "[]"})
|
||||
assert res["code"] == 101, res
|
||||
assert "dictionary" in res["message"], res
|
||||
|
||||
|
||||
def _run(coro):
|
||||
return asyncio.run(coro)
|
||||
@@ -590,87 +537,6 @@ class TestDocumentMetadataUnit:
|
||||
assert res["code"] == 0
|
||||
assert res["data"]["doc1"]["status"] == "1"
|
||||
|
||||
def test_rename_branch_matrix_and_exception_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
file_updates = []
|
||||
es_updates = []
|
||||
|
||||
async def fake_thread_pool_exec(func, *_args, **_kwargs):
|
||||
return func()
|
||||
|
||||
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
|
||||
monkeypatch.setattr(module.DocumentService, "get_tenant_id", lambda _doc_id: "tenant1")
|
||||
monkeypatch.setattr(module.rag_tokenizer, "tokenize", lambda _name: ["token"])
|
||||
monkeypatch.setattr(module.rag_tokenizer, "fine_grained_tokenize", lambda _tokens: ["fine"])
|
||||
monkeypatch.setattr(module, "server_error_response", lambda e: {"code": 500, "message": str(e)})
|
||||
|
||||
class _DocStore:
|
||||
def index_exist(self, _index_name, _kb_id):
|
||||
return True
|
||||
|
||||
def update(self, where, payload, _index_name, _kb_id):
|
||||
es_updates.append((where, payload))
|
||||
|
||||
monkeypatch.setattr(module.settings, "docStoreConn", _DocStore())
|
||||
monkeypatch.setattr(module.search, "index_name", lambda tenant_id: f"idx_{tenant_id}")
|
||||
|
||||
def set_req(name):
|
||||
async def fake_request_json():
|
||||
return {"doc_id": "doc1", "name": name}
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", fake_request_json)
|
||||
|
||||
set_req("renamed.txt")
|
||||
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: False)
|
||||
res = _run(module.rename.__wrapped__())
|
||||
assert res["code"] == module.RetCode.AUTHENTICATION_ERROR
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: True)
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (False, None))
|
||||
res = _run(module.rename.__wrapped__())
|
||||
assert res["code"] == module.RetCode.DATA_ERROR
|
||||
assert "Document not found!" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (True, SimpleNamespace(id="doc1", name="origin.txt", kb_id="kb1")))
|
||||
set_req("renamed.pdf")
|
||||
res = _run(module.rename.__wrapped__())
|
||||
assert res["code"] == module.RetCode.ARGUMENT_ERROR
|
||||
assert "extension" in res["message"]
|
||||
|
||||
too_long = "a" * (module.FILE_NAME_LEN_LIMIT + 1) + ".txt"
|
||||
set_req(too_long)
|
||||
res = _run(module.rename.__wrapped__())
|
||||
assert res["code"] == module.RetCode.ARGUMENT_ERROR
|
||||
assert "bytes or less" in res["message"]
|
||||
|
||||
set_req("dup.txt")
|
||||
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [SimpleNamespace(name="dup.txt")])
|
||||
res = _run(module.rename.__wrapped__())
|
||||
assert res["code"] == module.RetCode.DATA_ERROR
|
||||
assert "Duplicated document name" in res["message"]
|
||||
|
||||
set_req("ok.txt")
|
||||
monkeypatch.setattr(module.DocumentService, "query", lambda **_kwargs: [])
|
||||
monkeypatch.setattr(module.DocumentService, "update_by_id", lambda *_args, **_kwargs: True)
|
||||
monkeypatch.setattr(module.File2DocumentService, "get_by_document_id", lambda _doc_id: [SimpleNamespace(file_id="file1")])
|
||||
monkeypatch.setattr(module.FileService, "get_by_id", lambda _file_id: (True, SimpleNamespace(id="file1")))
|
||||
monkeypatch.setattr(module.FileService, "update_by_id", lambda file_id, payload: file_updates.append((file_id, payload)))
|
||||
res = _run(module.rename.__wrapped__())
|
||||
assert res["code"] == 0
|
||||
assert file_updates == [("file1", {"name": "ok.txt"})]
|
||||
assert es_updates[0][0] == {"doc_id": "doc1"}
|
||||
assert es_updates[0][1]["docnm_kwd"] == "ok.txt"
|
||||
assert es_updates[0][1]["title_tks"] == ["token"]
|
||||
assert es_updates[0][1]["title_sm_tks"] == ["fine"]
|
||||
|
||||
def raise_db_error(*_args, **_kwargs):
|
||||
raise RuntimeError("rename boom")
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "update_by_id", raise_db_error)
|
||||
res = _run(module.rename.__wrapped__())
|
||||
assert res["code"] == 500
|
||||
assert "rename boom" in res["message"]
|
||||
|
||||
def test_get_route_not_found_success_and_exception_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (False, None))
|
||||
@@ -932,50 +798,3 @@ class TestDocumentMetadataUnit:
|
||||
res = _run(module.get_image("bucket-name"))
|
||||
assert res["code"] == 500
|
||||
assert "image boom" in res["message"]
|
||||
|
||||
def test_set_meta_validation_and_persistence_matrix_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
|
||||
def set_req(payload):
|
||||
async def fake_request_json():
|
||||
return payload
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", fake_request_json)
|
||||
|
||||
set_req({"doc_id": "doc1", "meta": "{}"})
|
||||
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: False)
|
||||
res = _run(module.set_meta.__wrapped__())
|
||||
assert res["code"] == module.RetCode.AUTHENTICATION_ERROR
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "accessible", lambda *_args, **_kwargs: True)
|
||||
set_req({"doc_id": "doc1", "meta": "[]"})
|
||||
res = _run(module.set_meta.__wrapped__())
|
||||
assert res["code"] == module.RetCode.ARGUMENT_ERROR
|
||||
assert "Only dictionary type supported." in res["message"]
|
||||
|
||||
set_req({"doc_id": "doc1", "meta": '{"tags":[{"x":1}]}'})
|
||||
res = _run(module.set_meta.__wrapped__())
|
||||
assert res["code"] == module.RetCode.ARGUMENT_ERROR
|
||||
assert "The type is not supported in list" in res["message"]
|
||||
|
||||
set_req({"doc_id": "doc1", "meta": '{"obj":{"x":1}}'})
|
||||
res = _run(module.set_meta.__wrapped__())
|
||||
assert res["code"] == module.RetCode.ARGUMENT_ERROR
|
||||
assert "The type is not supported" in res["message"]
|
||||
|
||||
set_req({"doc_id": "doc1", "meta": "{"})
|
||||
res = _run(module.set_meta.__wrapped__())
|
||||
assert res["code"] == module.RetCode.ARGUMENT_ERROR
|
||||
assert "Json syntax error:" in res["message"]
|
||||
|
||||
set_req({"doc_id": "doc1", "meta": '{"author":"alice"}'})
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (False, None))
|
||||
res = _run(module.set_meta.__wrapped__())
|
||||
assert res["code"] == module.RetCode.DATA_ERROR
|
||||
assert "Document not found!" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _doc_id: (True, SimpleNamespace(id="doc1")))
|
||||
monkeypatch.setattr(module.DocMetadataService, "update_document_metadata", lambda *_args, **_kwargs: False)
|
||||
res = _run(module.set_meta.__wrapped__())
|
||||
assert res["code"] == module.RetCode.DATA_ERROR
|
||||
assert "Database error (meta updates)!" in res["message"]
|
||||
|
||||
@@ -15,7 +15,10 @@ import {
|
||||
} from '@/interfaces/request/document';
|
||||
import i18n from '@/locales/config';
|
||||
import { EMPTY_METADATA_FIELD } from '@/pages/dataset/dataset/use-select-filters';
|
||||
import kbService, { listDocument } from '@/services/knowledge-service';
|
||||
import kbService, {
|
||||
listDocument,
|
||||
renameDocument,
|
||||
} from '@/services/knowledge-service';
|
||||
import api, { restAPIv1, webAPI } from '@/utils/api';
|
||||
import { getSearchValue } from '@/utils/common-util';
|
||||
import { buildChunkHighlights } from '@/utils/document-util';
|
||||
@@ -336,12 +339,13 @@ export const useSaveDocumentName = () => {
|
||||
mutationFn: async ({
|
||||
name,
|
||||
documentId,
|
||||
kbId,
|
||||
}: {
|
||||
name: string;
|
||||
documentId: string;
|
||||
kbId: string;
|
||||
}) => {
|
||||
const { data } = await kbService.document_rename({
|
||||
doc_id: documentId,
|
||||
const { data } = await renameDocument(kbId, documentId, {
|
||||
name: name,
|
||||
});
|
||||
if (data.code === 0) {
|
||||
|
||||
@@ -15,14 +15,18 @@ export const useRenameDocument = () => {
|
||||
|
||||
const onRenameOk = useCallback(
|
||||
async (name: string) => {
|
||||
if (record?.id) {
|
||||
const ret = await saveName({ documentId: record.id, name });
|
||||
if (record?.id && record?.kb_id) {
|
||||
const ret = await saveName({
|
||||
documentId: record.id,
|
||||
name,
|
||||
kbId: record.kb_id,
|
||||
});
|
||||
if (ret === 0) {
|
||||
hideRenameModal();
|
||||
}
|
||||
}
|
||||
},
|
||||
[record?.id, saveName, hideRenameModal],
|
||||
[record?.id, record?.kb_id, saveName, hideRenameModal],
|
||||
);
|
||||
|
||||
const handleShow = useCallback(
|
||||
|
||||
@@ -77,7 +77,7 @@ const methods = {
|
||||
},
|
||||
document_rename: {
|
||||
url: document_rename,
|
||||
method: 'post',
|
||||
method: 'put',
|
||||
},
|
||||
document_create: {
|
||||
url: document_create,
|
||||
@@ -251,6 +251,12 @@ export const listDocument = (
|
||||
export const documentFilter = (kb_id: string) =>
|
||||
request.post(api.get_dataset_filter, { kb_id });
|
||||
|
||||
export const renameDocument = (
|
||||
datasetId: string,
|
||||
documentId: string,
|
||||
data: { name?: string },
|
||||
) => request.put(api.document_rename(datasetId, documentId), { data });
|
||||
|
||||
export const getMetaDataService = ({
|
||||
kb_id,
|
||||
doc_ids,
|
||||
|
||||
@@ -109,7 +109,8 @@ export default {
|
||||
document_change_status: `${webAPI}/document/change_status`,
|
||||
document_rm: `${webAPI}/document/rm`,
|
||||
document_delete: `${webAPI}/api/document`,
|
||||
document_rename: `${webAPI}/document/rename`,
|
||||
document_rename: (datasetId: string, documentId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
|
||||
document_create: `${webAPI}/document/create`,
|
||||
document_run: `${webAPI}/document/run`,
|
||||
document_change_parser: `${webAPI}/document/change_parser`,
|
||||
|
||||
Reference in New Issue
Block a user