From 51ce6aab01a5d9012d096acfc1bcdf25dad701da Mon Sep 17 00:00:00 2001 From: Jack Date: Mon, 13 Apr 2026 12:47:17 +0800 Subject: [PATCH] Consolidate set_meta into update_document (#14045) ### What problem does this PR solve? Consolidate "set_meta" API into "update_document" . Before consolidation Web API: POST /api/v1/document/set_meta Http API - PUT /v1/datasets//document/ After consolidation, Restful API -- PUT /v1/datasets//document/ ### Type of change - [x] Refactoring --- admin/client/ragflow_client.py | 58 +++++++++++++++++-- api/apps/document_app.py | 36 ------------ .../test_chunk_feedback/__init__.py | 16 ----- 3 files changed, 54 insertions(+), 56 deletions(-) delete mode 100644 test/testcases/test_web_api/test_chunk_feedback/__init__.py diff --git a/admin/client/ragflow_client.py b/admin/client/ragflow_client.py index 3b0d8556e4..065dee484e 100644 --- a/admin/client/ragflow_client.py +++ b/admin/client/ragflow_client.py @@ -1564,6 +1564,28 @@ class RAGFlowClient: else: print(f"Fail to update chunk, HTTP {response.status_code}") + def _get_documents_by_ids(self, ids:list[str]): + response = self.http_client.request( + "POST", + "/document/infos", + json_body={"doc_ids": ids}, + use_api_base=False, + auth_kind="web" + ) + + if response.status_code != 200: + return f"Fail to get document info, HTTP {response.status_code}", None + + res_json = response.json() + if res_json.get("code") != 0: + return f"Fail to get document info: {res_json.get('message')}", None + + docs = res_json.get("data", []) + if not docs: + return f"Document not found: {ids}", None + + return None, docs + def set_metadata(self, command_dict): if self.server_type != "user": print("This command is only allowed in USER mode") @@ -1572,14 +1594,42 @@ class RAGFlowClient: doc_id = command_dict["doc_id"] meta_json_str = command_dict["meta"] + # Parse JSON string to dict + import json + try: + meta_fields = json.loads(meta_json_str) + except json.JSONDecodeError as e: + print(f"Invalid JSON format: {e}") + return + + # Step 1: Get document info to find kb_id (dataset_id) + doc_error_msg, docs = self._get_documents_by_ids([doc_id]) + if doc_error_msg: + print(doc_error_msg) + return + + if len(docs) == 0: + print(f"no document found for {doc_id}") + return + + dataset_id = docs[0].get("kb_id") + if not dataset_id: + print(f"Dataset ID not found for document: {doc_id}") + return + # Send meta as JSON string payload = { - "doc_id": doc_id, - "meta": meta_json_str, + "meta_fields": meta_fields, } - response = self.http_client.request("POST", "/document/set_meta", json_body=payload, - use_api_base=False, auth_kind="web") + response = self.http_client.request( + "PUT", + f"/datasets/{dataset_id}/documents/{doc_id}", + json_body=payload, + use_api_base=True, + auth_kind="web" + ) + res_json = response.json() if response.status_code == 200: if res_json.get("code") == 0: diff --git a/api/apps/document_app.py b/api/apps/document_app.py index faa29fb59d..ce5d2c2478 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License # -import json import os.path import re from pathlib import Path, PurePosixPath, PureWindowsPath @@ -867,41 +866,6 @@ async def parse(): return get_json_result(data=txt) -@manager.route("/set_meta", methods=["POST"]) # noqa: F821 -@login_required -@validate_request("doc_id", "meta") -async def set_meta(): - req = await get_request_json() - if not DocumentService.accessible(req["doc_id"], current_user.id): - return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR) - try: - meta = json.loads(req["meta"]) - if not isinstance(meta, dict): - return get_json_result(data=False, message="Only dictionary type supported.", code=RetCode.ARGUMENT_ERROR) - for k, v in meta.items(): - if isinstance(v, list): - if not all(isinstance(i, (str, int, float)) for i in v): - return get_json_result(data=False, message=f"The type is not supported in list: {v}", code=RetCode.ARGUMENT_ERROR) - elif not isinstance(v, (str, int, float)): - return get_json_result(data=False, message=f"The type is not supported: {v}", code=RetCode.ARGUMENT_ERROR) - except Exception as e: - return get_json_result(data=False, message=f"Json syntax error: {e}", code=RetCode.ARGUMENT_ERROR) - if not isinstance(meta, dict): - return get_json_result(data=False, message='Meta data should be in Json map format, like {"key": "value"}', code=RetCode.ARGUMENT_ERROR) - - try: - e, doc = DocumentService.get_by_id(req["doc_id"]) - if not e: - return get_data_error_result(message="Document not found!") - - if not DocMetadataService.update_document_metadata(req["doc_id"], meta): - return get_data_error_result(message="Database error (meta updates)!") - - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - @manager.route("/upload_info", methods=["POST"]) # noqa: F821 @login_required async def upload_info(): diff --git a/test/testcases/test_web_api/test_chunk_feedback/__init__.py b/test/testcases/test_web_api/test_chunk_feedback/__init__.py deleted file mode 100644 index a1d26f7eec..0000000000 --- a/test/testcases/test_web_api/test_chunk_feedback/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# -# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = []