Consolidate set_meta into update_document (#14045)

### What problem does this PR solve?

Consolidate "set_meta" API into "update_document" .

Before consolidation
Web API: POST /api/v1/document/set_meta
Http API - PUT /v1/datasets/<dataset_id>/document/<document_id>

After consolidation, Restful API -- PUT
/v1/datasets/<dataset_id>/document/<document_id>

### Type of change

- [x] Refactoring
This commit is contained in:
Jack
2026-04-13 12:47:17 +08:00
committed by GitHub
parent 3911d90993
commit 51ce6aab01
3 changed files with 54 additions and 56 deletions

View File

@@ -1564,6 +1564,28 @@ class RAGFlowClient:
else:
print(f"Fail to update chunk, HTTP {response.status_code}")
def _get_documents_by_ids(self, ids:list[str]):
response = self.http_client.request(
"POST",
"/document/infos",
json_body={"doc_ids": ids},
use_api_base=False,
auth_kind="web"
)
if response.status_code != 200:
return f"Fail to get document info, HTTP {response.status_code}", None
res_json = response.json()
if res_json.get("code") != 0:
return f"Fail to get document info: {res_json.get('message')}", None
docs = res_json.get("data", [])
if not docs:
return f"Document not found: {ids}", None
return None, docs
def set_metadata(self, command_dict):
if self.server_type != "user":
print("This command is only allowed in USER mode")
@@ -1572,14 +1594,42 @@ class RAGFlowClient:
doc_id = command_dict["doc_id"]
meta_json_str = command_dict["meta"]
# Parse JSON string to dict
import json
try:
meta_fields = json.loads(meta_json_str)
except json.JSONDecodeError as e:
print(f"Invalid JSON format: {e}")
return
# Step 1: Get document info to find kb_id (dataset_id)
doc_error_msg, docs = self._get_documents_by_ids([doc_id])
if doc_error_msg:
print(doc_error_msg)
return
if len(docs) == 0:
print(f"no document found for {doc_id}")
return
dataset_id = docs[0].get("kb_id")
if not dataset_id:
print(f"Dataset ID not found for document: {doc_id}")
return
# Send meta as JSON string
payload = {
"doc_id": doc_id,
"meta": meta_json_str,
"meta_fields": meta_fields,
}
response = self.http_client.request("POST", "/document/set_meta", json_body=payload,
use_api_base=False, auth_kind="web")
response = self.http_client.request(
"PUT",
f"/datasets/{dataset_id}/documents/{doc_id}",
json_body=payload,
use_api_base=True,
auth_kind="web"
)
res_json = response.json()
if response.status_code == 200:
if res_json.get("code") == 0: