Consolidate set_meta into update_document (#14045)

### What problem does this PR solve?

Consolidate "set_meta" API into "update_document" .

Before consolidation
Web API: POST /api/v1/document/set_meta
Http API - PUT /v1/datasets/<dataset_id>/document/<document_id>

After consolidation, Restful API -- PUT
/v1/datasets/<dataset_id>/document/<document_id>

### Type of change

- [x] Refactoring
This commit is contained in:
Jack
2026-04-13 12:47:17 +08:00
committed by GitHub
parent 3911d90993
commit 51ce6aab01
3 changed files with 54 additions and 56 deletions

View File

@@ -1564,6 +1564,28 @@ class RAGFlowClient:
else:
print(f"Fail to update chunk, HTTP {response.status_code}")
def _get_documents_by_ids(self, ids:list[str]):
response = self.http_client.request(
"POST",
"/document/infos",
json_body={"doc_ids": ids},
use_api_base=False,
auth_kind="web"
)
if response.status_code != 200:
return f"Fail to get document info, HTTP {response.status_code}", None
res_json = response.json()
if res_json.get("code") != 0:
return f"Fail to get document info: {res_json.get('message')}", None
docs = res_json.get("data", [])
if not docs:
return f"Document not found: {ids}", None
return None, docs
def set_metadata(self, command_dict):
if self.server_type != "user":
print("This command is only allowed in USER mode")
@@ -1572,14 +1594,42 @@ class RAGFlowClient:
doc_id = command_dict["doc_id"]
meta_json_str = command_dict["meta"]
# Parse JSON string to dict
import json
try:
meta_fields = json.loads(meta_json_str)
except json.JSONDecodeError as e:
print(f"Invalid JSON format: {e}")
return
# Step 1: Get document info to find kb_id (dataset_id)
doc_error_msg, docs = self._get_documents_by_ids([doc_id])
if doc_error_msg:
print(doc_error_msg)
return
if len(docs) == 0:
print(f"no document found for {doc_id}")
return
dataset_id = docs[0].get("kb_id")
if not dataset_id:
print(f"Dataset ID not found for document: {doc_id}")
return
# Send meta as JSON string
payload = {
"doc_id": doc_id,
"meta": meta_json_str,
"meta_fields": meta_fields,
}
response = self.http_client.request("POST", "/document/set_meta", json_body=payload,
use_api_base=False, auth_kind="web")
response = self.http_client.request(
"PUT",
f"/datasets/{dataset_id}/documents/{doc_id}",
json_body=payload,
use_api_base=True,
auth_kind="web"
)
res_json = response.json()
if response.status_code == 200:
if res_json.get("code") == 0:

View File

@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License
#
import json
import os.path
import re
from pathlib import Path, PurePosixPath, PureWindowsPath
@@ -867,41 +866,6 @@ async def parse():
return get_json_result(data=txt)
@manager.route("/set_meta", methods=["POST"]) # noqa: F821
@login_required
@validate_request("doc_id", "meta")
async def set_meta():
req = await get_request_json()
if not DocumentService.accessible(req["doc_id"], current_user.id):
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
try:
meta = json.loads(req["meta"])
if not isinstance(meta, dict):
return get_json_result(data=False, message="Only dictionary type supported.", code=RetCode.ARGUMENT_ERROR)
for k, v in meta.items():
if isinstance(v, list):
if not all(isinstance(i, (str, int, float)) for i in v):
return get_json_result(data=False, message=f"The type is not supported in list: {v}", code=RetCode.ARGUMENT_ERROR)
elif not isinstance(v, (str, int, float)):
return get_json_result(data=False, message=f"The type is not supported: {v}", code=RetCode.ARGUMENT_ERROR)
except Exception as e:
return get_json_result(data=False, message=f"Json syntax error: {e}", code=RetCode.ARGUMENT_ERROR)
if not isinstance(meta, dict):
return get_json_result(data=False, message='Meta data should be in Json map format, like {"key": "value"}', code=RetCode.ARGUMENT_ERROR)
try:
e, doc = DocumentService.get_by_id(req["doc_id"])
if not e:
return get_data_error_result(message="Document not found!")
if not DocMetadataService.update_document_metadata(req["doc_id"], meta):
return get_data_error_result(message="Database error (meta updates)!")
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@manager.route("/upload_info", methods=["POST"]) # noqa: F821
@login_required
async def upload_info():

View File

@@ -1,16 +0,0 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = []