Files
ragflow/api/apps/restful_apis/document_api.py
Jack 577c96bf2a Refactor: Merge document update API (#13962)
### What problem does this PR solve?

Refactor: merge document.rename into document.update_document

### Type of change

- [x] Refactoring


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Added a unified document update API (PUT) supporting name, metadata,
parser/chunk settings, and status changes.

* **Breaking Changes**
* Legacy single-parameter rename endpoint removed; renames now require
dataset + document identifiers.
  * `/list` now reads dataset id from a different query parameter.

* **Validation / Bug Fixes**
* Stricter meta_fields and parser-config validation; unauthenticated
requests return 401.

* **Frontend**
  * UI now sends dataset id when saving document names.

* **Tests**
* Numerous unit and HTTP tests adjusted or removed to match new API and
validations.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: MkDev11 <94194147+MkDev11@users.noreply.github.com>
Co-authored-by: mkdev11 <YOUR_GITHUB_ID+MkDev11@users.noreply.github.com>
Co-authored-by: mkdev11 <MkDev11@users.noreply.github.com>
Co-authored-by: Qi Wang <wangq8@outlook.com>
Co-authored-by: dataCenter430 <161712630+dataCenter430@users.noreply.github.com>
Co-authored-by: balibabu <cike8899@users.noreply.github.com>
2026-04-09 11:17:38 +08:00

147 lines
5.5 KiB
Python

#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
from peewee import OperationalError
from pydantic import ValidationError
from api.apps.services.document_api_service import rename_doc_key, validate_document_update_fields, \
update_document_name_only, update_chunk_method_only, update_document_status_only
from api.db.services.doc_metadata_service import DocMetadataService
from api.db.services.document_service import DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService
from common.constants import RetCode
from api.apps import login_required
from api.utils.api_utils import get_error_data_result, get_result, add_tenant_id_to_kwargs, get_request_json
from api.utils.validation_utils import (
UpdateDocumentReq, format_validation_error_message,
)
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PUT"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def update_document(tenant_id, dataset_id, document_id):
"""
Update a document within a dataset.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: path
name: dataset_id
type: string
required: true
description: ID of the dataset.
- in: path
name: document_id
type: string
required: true
description: ID of the document to update.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
- in: body
name: body
description: Document update parameters.
required: true
schema:
type: object
properties:
name:
type: string
description: New name of the document.
parser_config:
type: object
description: Parser configuration.
chunk_method:
type: string
description: Chunking method.
enabled:
type: boolean
description: Document status.
responses:
200:
description: Document updated successfully.
schema:
type: object
"""
req = await get_request_json()
# Verify ownership and existence of dataset and document
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
return get_error_data_result(message="You don't own the dataset.")
e, kb = KnowledgebaseService.get_by_id(dataset_id)
if not e:
return get_error_data_result(message="Can't find this dataset!")
# Prepare data for validation
docs = DocumentService.query(kb_id=dataset_id, id=document_id)
if not docs:
return get_error_data_result(message="The dataset doesn't own the document.")
# Validate document update request parameters
try:
update_doc_req = UpdateDocumentReq(**req)
except ValidationError as e:
return get_error_data_result(message=format_validation_error_message(e), code=RetCode.DATA_ERROR)
doc = docs[0]
# further check with inner status (from DB)
error_msg, error_code = validate_document_update_fields(update_doc_req, doc, req)
if error_msg:
return get_error_data_result(message=error_msg, code=error_code)
# All validations passed, now perform all updates
# meta_fields provided, then update it
if "meta_fields" in req:
if not DocMetadataService.update_document_metadata(document_id, update_doc_req.meta_fields):
return get_error_data_result(message="Failed to update metadata")
# doc name provided from request and diff with existing value, update
if "name" in req and req["name"] != doc.name:
if error := update_document_name_only(document_id, req["name"]):
return error
# parser config provided (already validated in UpdateDocumentReq), update it
if update_doc_req.parser_config:
DocumentService.update_parser_config(doc.id, req["parser_config"])
# chunk method provided - the update method will check if it's different with existing one
if update_doc_req.chunk_method:
if error := update_chunk_method_only(req, doc, dataset_id, tenant_id):
return error
if "enabled" in req: # already checked in UpdateDocumentReq - it's int if it's present
# "enabled" flag provided, the update method will check if it's changed and then update if so
if error := update_document_status_only(int(req["enabled"]), doc, kb):
return error
try:
original_doc_id = doc.id
ok, doc = DocumentService.get_by_id(doc.id)
if not ok:
return get_error_data_result(message=f"Can not get document by id:{original_doc_id}")
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
renamed_doc = rename_doc_key(doc)
return get_result(data=renamed_doc)