mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Refactor: Consolidation WEB API & HTTP API for document infos (#14239)
### What problem does this PR solve? Before consolidation Web API: POST /v1/document/infos Http API - GET /api/v1/datasets/<dataset_id>/documents After consolidation, Restful API -- GET /api/v1/datasets/<dataset_id>/documents?ids=id1&ids=id2 ### Type of change - [ ] Refactoring
This commit is contained in:
@@ -66,6 +66,7 @@ class DataSet(Base):
|
||||
def list_documents(
|
||||
self,
|
||||
id: str | None = None,
|
||||
ids: list[str] | None = None,
|
||||
name: str | None = None,
|
||||
keywords: str | None = None,
|
||||
page: int = 1,
|
||||
@@ -75,6 +76,10 @@ class DataSet(Base):
|
||||
create_time_from: int = 0,
|
||||
create_time_to: int = 0,
|
||||
):
|
||||
# Validate that id and ids are not used together
|
||||
if id and ids:
|
||||
raise ValueError("Cannot use both 'id' and 'ids' parameters at the same time.")
|
||||
|
||||
params = {
|
||||
"id": id,
|
||||
"name": name,
|
||||
@@ -86,6 +91,10 @@ class DataSet(Base):
|
||||
"create_time_from": create_time_from,
|
||||
"create_time_to": create_time_to,
|
||||
}
|
||||
# Handle ids parameter - convert to multiple query params
|
||||
if ids:
|
||||
for doc_id in ids:
|
||||
params.append(("ids", doc_id))
|
||||
res = self.get(f"/datasets/{self.id}/documents", params=params)
|
||||
res = res.json()
|
||||
documents = []
|
||||
|
||||
@@ -75,11 +75,36 @@ def list_document(auth, dataset_id):
|
||||
return res.json()
|
||||
|
||||
|
||||
def get_docs_info(auth, doc_ids):
|
||||
def get_docs_info(auth, dataset_id, doc_ids=None, doc_id=None):
|
||||
"""
|
||||
Get document information by IDs.
|
||||
|
||||
Args:
|
||||
auth: Authorization header
|
||||
dataset_id: Dataset ID
|
||||
doc_ids: List of document IDs (use for multiple) - exclusive with doc_id
|
||||
doc_id: Single document ID (use for one) - exclusive with doc_ids
|
||||
|
||||
Raises:
|
||||
ValueError: If both doc_id and doc_ids are provided
|
||||
"""
|
||||
# Validate that id and ids are not used together
|
||||
if doc_id and doc_ids:
|
||||
raise ValueError("Cannot use both 'id' and 'ids' parameters at the same time.")
|
||||
|
||||
authorization = {"Authorization": auth}
|
||||
json_req = {"doc_ids": doc_ids}
|
||||
url = f"{HOST_ADDRESS}/v1/document/infos"
|
||||
res = requests.post(url=url, headers=authorization, json=json_req)
|
||||
params = {}
|
||||
if doc_ids:
|
||||
# Multiple IDs
|
||||
for id in doc_ids:
|
||||
params.append(("ids", id))
|
||||
elif doc_id:
|
||||
# Single ID
|
||||
params["id"] = doc_id
|
||||
|
||||
# Use /api/v1 prefix for dataset API
|
||||
url = f"{HOST_ADDRESS}/api/v1/datasets/{dataset_id}/documents"
|
||||
res = requests.get(url=url, headers=authorization, params=params)
|
||||
return res.json()
|
||||
|
||||
|
||||
|
||||
@@ -48,14 +48,14 @@ def test_parse_txt_document(get_auth):
|
||||
for doc in res['data']['docs']:
|
||||
doc_id_list.append(doc['id'])
|
||||
|
||||
res = get_docs_info(get_auth, doc_id_list)
|
||||
res = get_docs_info(get_auth, dataset_id, doc_ids=doc_id_list)
|
||||
print(doc_id_list)
|
||||
doc_count = len(doc_id_list)
|
||||
res = parse_docs(get_auth, doc_id_list)
|
||||
|
||||
start_ts = timer()
|
||||
while True:
|
||||
res = get_docs_info(get_auth, doc_id_list)
|
||||
res = get_docs_info(get_auth, dataset_id, doc_ids=doc_id_list)
|
||||
finished_count = 0
|
||||
for doc_info in res['data']:
|
||||
if doc_info['progress'] == 1:
|
||||
|
||||
Reference in New Issue
Block a user