mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Refactor: Consolidation WEB API & HTTP API for document delete api (#14254)
### What problem does this PR solve? Before consolidation Web API: POST /v1/document/rm Http API - DELETE /api/v1/datasets/<dataset_id>/documents After consolidation, Restful API -- DELETE /api/v1/datasets/<dataset_id>/documents ### Type of change - [x] Refactoring
This commit is contained in:
@@ -319,27 +319,6 @@ async def change_status():
|
||||
return get_json_result(data=result)
|
||||
|
||||
|
||||
@manager.route("/rm", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("doc_id")
|
||||
async def rm():
|
||||
req = await get_request_json()
|
||||
doc_ids = req["doc_id"]
|
||||
if isinstance(doc_ids, str):
|
||||
doc_ids = [doc_ids]
|
||||
|
||||
for doc_id in doc_ids:
|
||||
if not DocumentService.accessible4deletion(doc_id, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
errors = await thread_pool_exec(FileService.delete_docs, doc_ids, current_user.id)
|
||||
|
||||
if errors:
|
||||
return get_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
|
||||
@manager.route("/run", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("doc_ids", "run")
|
||||
|
||||
@@ -27,14 +27,17 @@ from api.constants import IMG_BASE64_PREFIX
|
||||
from api.db import VALID_FILE_TYPES
|
||||
from api.db.services.doc_metadata_service import DocMetadataService
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file_service import FileService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.common.check_team_permission import check_kb_team_permission
|
||||
from api.utils.api_utils import get_data_error_result, get_error_data_result, get_result, get_json_result, \
|
||||
server_error_response, add_tenant_id_to_kwargs, get_request_json
|
||||
server_error_response, add_tenant_id_to_kwargs, get_request_json, get_error_argument_result, check_duplicate_ids
|
||||
from api.utils.validation_utils import (
|
||||
UpdateDocumentReq, format_validation_error_message,
|
||||
UpdateDocumentReq, format_validation_error_message, validate_and_parse_json_request, DeleteDocumentReq,
|
||||
)
|
||||
from common.constants import RetCode
|
||||
from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema
|
||||
from common.misc_utils import thread_pool_exec
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PATCH"]) # noqa: F821
|
||||
@login_required
|
||||
@@ -260,9 +263,7 @@ async def upload_document(dataset_id, tenant_id):
|
||||
description: Processing status.
|
||||
"""
|
||||
from api.constants import FILE_NAME_LEN_LIMIT
|
||||
from api.common.check_team_permission import check_kb_team_permission
|
||||
from api.db.services.file_service import FileService
|
||||
from common.misc_utils import thread_pool_exec
|
||||
|
||||
form = await request.form
|
||||
files = await request.files
|
||||
@@ -674,6 +675,89 @@ def _parse_doc_id_filter_with_metadata(req, kb_id):
|
||||
return RetCode.SUCCESS, "", list(doc_ids_filter) if doc_ids_filter is not None else [], return_empty_metadata
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
|
||||
@login_required
|
||||
@add_tenant_id_to_kwargs
|
||||
async def delete_documents(tenant_id, dataset_id):
|
||||
"""
|
||||
Delete documents from a dataset.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: path
|
||||
name: dataset_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the dataset containing the documents.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
- in: body
|
||||
name: body
|
||||
description: Document deletion parameters.
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
ids:
|
||||
type: array or null
|
||||
items:
|
||||
type: string
|
||||
description: |
|
||||
Specifies the documents to delete:
|
||||
- An array of IDs, only the specified documents will be deleted.
|
||||
delete_all:
|
||||
type: boolean
|
||||
default: false
|
||||
description: Whether to delete all documents in the dataset.
|
||||
responses:
|
||||
200:
|
||||
description: Successful operation.
|
||||
schema:
|
||||
type: object
|
||||
"""
|
||||
req, err = await validate_and_parse_json_request(request, DeleteDocumentReq)
|
||||
if err is not None or req is None:
|
||||
return get_error_argument_result(err)
|
||||
|
||||
try:
|
||||
# Validate dataset exists and user has permission
|
||||
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
||||
|
||||
# Get documents to delete
|
||||
doc_ids = req.get("ids") or []
|
||||
delete_all = req.get("delete_all", False)
|
||||
if not delete_all and len(doc_ids) == 0:
|
||||
return get_error_data_result(message=f"should either provide doc ids or set delete_all(true), dataset: {dataset_id}. ")
|
||||
|
||||
if len(doc_ids) > 0 and delete_all:
|
||||
return get_error_data_result(message=f"should not provide both doc ids and delete_all(true), dataset: {dataset_id}. ")
|
||||
if delete_all:
|
||||
doc_ids = [doc.id for doc in DocumentService.query(kb_id=dataset_id)]
|
||||
|
||||
# make sure each id is unique
|
||||
unique_doc_ids, duplicate_messages = check_duplicate_ids(doc_ids, "document")
|
||||
if duplicate_messages:
|
||||
logging.warning(f"duplicate_messages:{duplicate_messages}")
|
||||
else:
|
||||
doc_ids = unique_doc_ids
|
||||
|
||||
# Delete documents using existing FileService.delete_docs
|
||||
errors = await thread_pool_exec(FileService.delete_docs, doc_ids, tenant_id)
|
||||
|
||||
if errors:
|
||||
return get_error_data_result(message=str(errors))
|
||||
|
||||
return get_result(data={"deleted": len(doc_ids)})
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return get_error_data_result(message="Internal server error")
|
||||
def _aggregate_filters(docs):
|
||||
"""Aggregate filter options from a list of documents.
|
||||
|
||||
|
||||
@@ -21,12 +21,11 @@ import xxhash
|
||||
from pydantic import BaseModel, Field, validator
|
||||
from quart import request, send_file
|
||||
|
||||
from api.db.db_models import APIToken, Document, File, Task
|
||||
from api.db.db_models import APIToken, Document, Task
|
||||
from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_model_config_by_type_and_name, get_tenant_default_model_by_type
|
||||
from api.db.services.doc_metadata_service import DocMetadataService
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.db.services.file_service import FileService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
|
||||
@@ -34,7 +33,7 @@ from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required
|
||||
from api.utils.image_utils import store_chunk_image
|
||||
from common import settings
|
||||
from common.constants import FileSource, LLMType, ParserType, RetCode, TaskStatus
|
||||
from common.constants import LLMType, ParserType, RetCode, TaskStatus
|
||||
from common.metadata_utils import convert_conditions, meta_filter
|
||||
from common.misc_utils import thread_pool_exec
|
||||
from common.string_utils import is_content_empty, remove_redundant_spaces
|
||||
@@ -209,120 +208,6 @@ async def metadata_batch_update(dataset_id, tenant_id):
|
||||
return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
|
||||
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
|
||||
@token_required
|
||||
async def delete(tenant_id, dataset_id):
|
||||
"""
|
||||
Delete documents from a dataset.
|
||||
---
|
||||
tags:
|
||||
- Documents
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
- in: path
|
||||
name: dataset_id
|
||||
type: string
|
||||
required: true
|
||||
description: ID of the dataset.
|
||||
- in: body
|
||||
name: body
|
||||
description: Document deletion parameters.
|
||||
required: true
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: |
|
||||
List of document IDs to delete.
|
||||
If omitted, `null`, or an empty array is provided, no documents will be deleted.
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
required: true
|
||||
description: Bearer token for authentication.
|
||||
responses:
|
||||
200:
|
||||
description: Documents deleted successfully.
|
||||
schema:
|
||||
type: object
|
||||
"""
|
||||
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
||||
req = await get_request_json()
|
||||
if not req:
|
||||
return get_result()
|
||||
|
||||
doc_ids = req.get("ids")
|
||||
if not doc_ids:
|
||||
if req.get("delete_all") is True:
|
||||
doc_ids = [doc.id for doc in DocumentService.query(kb_id=dataset_id)]
|
||||
if not doc_ids:
|
||||
return get_result()
|
||||
else:
|
||||
return get_result()
|
||||
|
||||
doc_list = doc_ids
|
||||
|
||||
unique_doc_ids, duplicate_messages = check_duplicate_ids(doc_list, "document")
|
||||
doc_list = unique_doc_ids
|
||||
|
||||
root_folder = FileService.get_root_folder(tenant_id)
|
||||
pf_id = root_folder["id"]
|
||||
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
||||
errors = ""
|
||||
not_found = []
|
||||
success_count = 0
|
||||
for doc_id in doc_list:
|
||||
try:
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
not_found.append(doc_id)
|
||||
continue
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if not tenant_id:
|
||||
return get_error_data_result(message="Tenant not found!")
|
||||
|
||||
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
|
||||
|
||||
if not DocumentService.remove_document(doc, tenant_id):
|
||||
return get_error_data_result(message="Database error (Document removal)!")
|
||||
|
||||
f2d = File2DocumentService.get_by_document_id(doc_id)
|
||||
FileService.filter_delete(
|
||||
[
|
||||
File.source_type == FileSource.KNOWLEDGEBASE,
|
||||
File.id == f2d[0].file_id,
|
||||
]
|
||||
)
|
||||
File2DocumentService.delete_by_document_id(doc_id)
|
||||
|
||||
settings.STORAGE_IMPL.rm(b, n)
|
||||
success_count += 1
|
||||
except Exception as e:
|
||||
errors += str(e)
|
||||
|
||||
if not_found:
|
||||
return get_result(message=f"Documents not found: {not_found}", code=RetCode.DATA_ERROR)
|
||||
|
||||
if errors:
|
||||
return get_result(message=errors, code=RetCode.SERVER_ERROR)
|
||||
|
||||
if duplicate_messages:
|
||||
if success_count > 0:
|
||||
return get_result(
|
||||
message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors",
|
||||
data={"success_count": success_count, "errors": duplicate_messages},
|
||||
)
|
||||
else:
|
||||
return get_error_data_result(message=";".join(duplicate_messages))
|
||||
|
||||
return get_result()
|
||||
|
||||
|
||||
DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed"
|
||||
DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE"
|
||||
|
||||
|
||||
@@ -818,6 +818,9 @@ class DeleteReq(Base):
|
||||
class DeleteDatasetReq(DeleteReq): ...
|
||||
|
||||
|
||||
class DeleteDocumentReq(DeleteReq): ...
|
||||
|
||||
|
||||
class BaseListReq(BaseModel):
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
|
||||
@@ -26,11 +26,11 @@ class TestAuthorization:
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_auth, expected_code, expected_message",
|
||||
[
|
||||
(None, 0, "`Authorization` can't be empty"),
|
||||
(None, 401, "<Unauthorized '401: Unauthorized'>"),
|
||||
(
|
||||
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
|
||||
109,
|
||||
"Authentication error: API key is invalid!",
|
||||
401,
|
||||
"<Unauthorized '401: Unauthorized'>",
|
||||
),
|
||||
],
|
||||
)
|
||||
@@ -45,19 +45,19 @@ class TestDocumentsDeletion:
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message, remaining",
|
||||
[
|
||||
(None, 0, "", 3),
|
||||
({"ids": []}, 0, "", 3),
|
||||
({"ids": ["invalid_id"]}, 102, "Documents not found: ['invalid_id']", 3),
|
||||
({}, 102, "should either provide doc ids or set delete_all(true), dataset", 3),
|
||||
({"ids": []}, 102, "should either provide doc ids or set delete_all(true), dataset", 3),
|
||||
({"ids": ["invalid_id"]}, 101, "Field: <ids> - Message: <Invalid UUID1 format> - Value: <['invalid_id']>", 3),
|
||||
(
|
||||
{"ids": ["\n!?。;!?\"'"]},
|
||||
102,
|
||||
"""Documents not found: [\'\\n!?。;!?"\\\'\']""",
|
||||
101,
|
||||
"Field: <ids> - Message: <Invalid UUID1 format> - Value:",
|
||||
3,
|
||||
),
|
||||
(
|
||||
"not json",
|
||||
100,
|
||||
"AttributeError(\"'str' object has no attribute 'get'\")",
|
||||
101,
|
||||
"Invalid request payload: expected object, got str",
|
||||
3,
|
||||
),
|
||||
(lambda r: {"ids": r[:1]}, 0, "", 2),
|
||||
@@ -79,7 +79,7 @@ class TestDocumentsDeletion:
|
||||
res = delete_documents(HttpApiAuth, dataset_id, payload)
|
||||
assert res["code"] == expected_code
|
||||
if res["code"] != 0:
|
||||
assert res["message"] == expected_message
|
||||
assert expected_message in res["message"]
|
||||
|
||||
res = list_documents(HttpApiAuth, dataset_id)
|
||||
assert len(res["data"]["docs"]) == remaining
|
||||
@@ -117,12 +117,12 @@ class TestDocumentsDeletion:
|
||||
if callable(payload):
|
||||
payload = payload(document_ids)
|
||||
res = delete_documents(HttpApiAuth, dataset_id, payload)
|
||||
assert res["code"] == 102
|
||||
assert res["message"] == "Documents not found: ['invalid_id']"
|
||||
assert res["code"] == 101
|
||||
assert "Field: <ids> - Message: <Invalid UUID1 format> - Value" in res["message"]
|
||||
|
||||
res = list_documents(HttpApiAuth, dataset_id)
|
||||
assert len(res["data"]["docs"]) == 0
|
||||
assert res["data"]["total"] == 0
|
||||
assert len(res["data"]["docs"]) == 3
|
||||
assert res["data"]["total"] == 3
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_repeated_deletion(self, HttpApiAuth, add_documents_func):
|
||||
@@ -132,19 +132,18 @@ class TestDocumentsDeletion:
|
||||
|
||||
res = delete_documents(HttpApiAuth, dataset_id, {"ids": document_ids})
|
||||
assert res["code"] == 102
|
||||
assert "Documents not found" in res["message"]
|
||||
assert "Document not found" in res["message"]
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_duplicate_deletion(self, HttpApiAuth, add_documents_func):
|
||||
dataset_id, document_ids = add_documents_func
|
||||
res = delete_documents(HttpApiAuth, dataset_id, {"ids": document_ids + document_ids})
|
||||
assert res["code"] == 0
|
||||
assert "Duplicate document ids" in res["data"]["errors"][0]
|
||||
assert res["data"]["success_count"] == 3
|
||||
assert res["code"] == 101, res
|
||||
assert "Field: <ids> - Message: <Duplicate ids:" in res["message"]
|
||||
|
||||
res = list_documents(HttpApiAuth, dataset_id)
|
||||
assert len(res["data"]["docs"]) == 0
|
||||
assert res["data"]["total"] == 0
|
||||
assert len(res["data"]["docs"]) == 3
|
||||
assert res["data"]["total"] == 3
|
||||
|
||||
|
||||
@pytest.mark.p3
|
||||
|
||||
@@ -478,46 +478,6 @@ class TestDocRoutesUnit:
|
||||
assert res["data"]["matched_docs"] == 1
|
||||
|
||||
|
||||
def test_delete_branches(self, monkeypatch):
|
||||
module = _load_doc_module(monkeypatch)
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False)
|
||||
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
|
||||
assert "don't own the dataset" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: True)
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({}))
|
||||
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
|
||||
assert res["code"] == module.RetCode.SUCCESS
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"ids": ["doc-1"]}))
|
||||
monkeypatch.setattr(module, "check_duplicate_ids", lambda ids, _kind: (ids, []))
|
||||
monkeypatch.setattr(module.FileService, "get_root_folder", lambda _tenant: {"id": "pf-1"})
|
||||
monkeypatch.setattr(module.FileService, "init_knowledgebase_docs", lambda *_args, **_kwargs: None)
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _id: (True, _DummyDoc()))
|
||||
monkeypatch.setattr(module.DocumentService, "get_tenant_id", lambda _id: None)
|
||||
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
|
||||
assert res["message"] == "Tenant not found!"
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "get_tenant_id", lambda _id: "tenant-1")
|
||||
monkeypatch.setattr(module.File2DocumentService, "get_storage_address", lambda **_kwargs: ("b", "n"))
|
||||
monkeypatch.setattr(module.DocumentService, "remove_document", lambda *_args, **_kwargs: False)
|
||||
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
|
||||
assert "Document removal" in res["message"]
|
||||
|
||||
def _raise_get_by_id(_id):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", _raise_get_by_id)
|
||||
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
|
||||
assert res["code"] == module.RetCode.SERVER_ERROR
|
||||
assert "boom" in res["message"]
|
||||
|
||||
monkeypatch.setattr(module, "check_duplicate_ids", lambda _ids, _kind: ([], ["Duplicate document ids: doc-1"]))
|
||||
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _id: (False, None))
|
||||
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
|
||||
assert res["code"] == module.RetCode.DATA_ERROR
|
||||
assert "Duplicate document ids" in res["message"]
|
||||
|
||||
def test_parse_branches(self, monkeypatch):
|
||||
module = _load_doc_module(monkeypatch)
|
||||
monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False)
|
||||
|
||||
@@ -24,10 +24,10 @@ class TestDocumentsDeletion:
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_message, remaining",
|
||||
[
|
||||
({"ids": None}, "", 3),
|
||||
({"ids": []}, "", 3),
|
||||
({"ids": ["invalid_id"]}, "Documents not found: ['invalid_id']", 3),
|
||||
({"ids": ["\n!?。;!?\"'"]}, "Documents not found: ['\\n!?。;!?\"\\'']", 3),
|
||||
({"ids": None}, "should either provide doc ids or set delete_all(true), dataset:", 3),
|
||||
({"ids": []}, "should either provide doc ids or set delete_all(true), dataset:", 3),
|
||||
({"ids": ["invalid_id"]}, "Field: <ids> - Message: <Invalid UUID1 format> - Value: <['invalid_id']>", 3),
|
||||
({"ids": ["\n!?。;!?\"'"]}, "Field: <ids> - Message: <Invalid UUID1 format> - Value:", 3),
|
||||
("not json", "must be a mapping", 3),
|
||||
(lambda r: {"ids": r[:1]}, "", 2),
|
||||
(lambda r: {"ids": r}, "", 0),
|
||||
@@ -69,10 +69,10 @@ class TestDocumentsDeletion:
|
||||
|
||||
with pytest.raises(Exception) as exception_info:
|
||||
dataset.delete_documents(**payload)
|
||||
assert "Documents not found: ['invalid_id']" in str(exception_info.value), str(exception_info.value)
|
||||
assert "Field: <ids> - Message: <Invalid UUID1 format> - Value: <" in str(exception_info.value), str(exception_info.value)
|
||||
|
||||
documents = dataset.list_documents()
|
||||
assert len(documents) == 0, str(documents)
|
||||
assert len(documents) == 3, str(documents)
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_repeated_deletion(self, add_documents_func):
|
||||
@@ -81,14 +81,16 @@ class TestDocumentsDeletion:
|
||||
dataset.delete_documents(ids=document_ids)
|
||||
with pytest.raises(Exception) as exception_info:
|
||||
dataset.delete_documents(ids=document_ids)
|
||||
assert "Documents not found" in str(exception_info.value), str(exception_info.value)
|
||||
assert "Document not found" in str(exception_info.value), str(exception_info.value)
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_duplicate_deletion(self, add_documents_func):
|
||||
dataset, documents = add_documents_func
|
||||
document_ids = [document.id for document in documents]
|
||||
dataset.delete_documents(ids=document_ids + document_ids)
|
||||
assert len(dataset.list_documents()) == 0, str(dataset.list_documents())
|
||||
with pytest.raises(Exception) as exception_info:
|
||||
dataset.delete_documents(ids=document_ids + document_ids)
|
||||
assert "Field: <ids> - Message: <Duplicate ids:" in str(exception_info.value), str(exception_info.value)
|
||||
assert len(dataset.list_documents()) == 3, str(dataset.list_documents())
|
||||
|
||||
|
||||
@pytest.mark.p3
|
||||
|
||||
@@ -218,8 +218,8 @@ class TestAddChunk:
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_add_chunk_to_deleted_document(self, WebApiAuth, add_document):
|
||||
_, doc_id = add_document
|
||||
delete_document(WebApiAuth, {"doc_id": doc_id})
|
||||
kb_id, doc_id = add_document
|
||||
delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
|
||||
res = add_chunk(WebApiAuth, {"doc_id": doc_id, "content_with_weight": "chunk test"})
|
||||
assert res["code"] == 102, res
|
||||
assert res["message"] == "Document not found!", res
|
||||
|
||||
@@ -251,8 +251,8 @@ class TestUpdateChunk:
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_update_chunk_to_deleted_document(self, WebApiAuth, add_chunks):
|
||||
_, doc_id, chunk_ids = add_chunks
|
||||
delete_document(WebApiAuth, {"doc_id": doc_id})
|
||||
kb_id, doc_id, chunk_ids = add_chunks
|
||||
delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
|
||||
payload = {"doc_id": doc_id, "chunk_id": chunk_ids[0], "content_with_weight": "test content"}
|
||||
res = update_chunk(WebApiAuth, payload)
|
||||
assert res["code"] == 102, res
|
||||
|
||||
@@ -382,8 +382,10 @@ def list_documents(auth, params=None, payload=None, *, headers=HEADERS, data=Non
|
||||
return res.json()
|
||||
|
||||
|
||||
def delete_document(auth, payload=None, *, headers=HEADERS, data=None):
|
||||
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/rm", headers=headers, auth=auth, json=payload, data=data)
|
||||
def delete_document(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
|
||||
# New API: DELETE /api/v1/datasets/<dataset_id>/documents
|
||||
url = f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/documents"
|
||||
res = requests.delete(url=url, headers=headers, auth=auth, json=payload, data=data)
|
||||
return res.json()
|
||||
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ def add_document_func(request, WebApiAuth, add_dataset, ragflow_tmp_dir):
|
||||
def cleanup():
|
||||
res = list_documents(WebApiAuth, {"kb_id": dataset_id})
|
||||
for doc in res["data"]["docs"]:
|
||||
delete_document(WebApiAuth, {"doc_id": doc["id"]})
|
||||
delete_document(WebApiAuth, dataset_id, {"ids": [doc["id"]]})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
@@ -49,7 +49,7 @@ def add_documents(request, WebApiAuth, add_dataset, ragflow_tmp_dir):
|
||||
def cleanup():
|
||||
res = list_documents(WebApiAuth, {"kb_id": dataset_id})
|
||||
for doc in res["data"]["docs"]:
|
||||
delete_document(WebApiAuth, {"doc_id": doc["id"]})
|
||||
delete_document(WebApiAuth, dataset_id, {"ids": [doc["id"]]})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
@@ -62,7 +62,7 @@ def add_documents_func(request, WebApiAuth, add_dataset_func, ragflow_tmp_dir):
|
||||
def cleanup():
|
||||
res = list_documents(WebApiAuth, {"kb_id": dataset_id})
|
||||
for doc in res["data"]["docs"]:
|
||||
delete_document(WebApiAuth, {"doc_id": doc["id"]})
|
||||
delete_document(WebApiAuth, dataset_id, {"ids": [doc["id"]]})
|
||||
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ class TestAuthorization:
|
||||
],
|
||||
)
|
||||
def test_invalid_auth(self, invalid_auth, expected_code, expected_message):
|
||||
res = delete_document(invalid_auth)
|
||||
res = delete_document(invalid_auth, "kb_id")
|
||||
assert res["code"] == expected_code, res
|
||||
assert res["message"] == expected_message, res
|
||||
|
||||
@@ -46,22 +46,23 @@ class TestDocumentsDeletion:
|
||||
@pytest.mark.parametrize(
|
||||
"payload, expected_code, expected_message, remaining",
|
||||
[
|
||||
(None, 101, "required argument are missing: doc_id; ", 3),
|
||||
({"doc_id": ""}, 109, "No authorization.", 3),
|
||||
({"doc_id": "invalid_id"}, 109, "No authorization.", 3),
|
||||
({"doc_id": "\n!?。;!?\"'"}, 109, "No authorization.", 3),
|
||||
("not json", 101, "required argument are missing: doc_id; ", 3),
|
||||
(lambda r: {"doc_id": r[0]}, 0, "", 2),
|
||||
({}, 102, "should either provide doc ids or set delete_all(true), dataset:", 3),
|
||||
({"invalid_key":[]}, 101, "Field: <invalid_key> - Message: <Extra inputs are not permitted> - Value: <[]>", 3),
|
||||
({"ids": ""}, 101, "Field: <ids> - Message: <Input should be a valid list> - Value: <>", 3),
|
||||
({"ids": ["invalid_id"]}, 101, "Field: <ids> - Message: <Invalid UUID1 format> - Value:", 3),
|
||||
("not json", 101, "Invalid request payload: expected object, got str", 3),
|
||||
(lambda r: {"ids": r[0]}, 101, "Field: <ids> - Message: <Input should be a valid list> - Value", 3),
|
||||
(lambda r: {"ids": r}, 0, "", 0),
|
||||
],
|
||||
)
|
||||
def test_basic_scenarios(self, WebApiAuth, add_documents_func, payload, expected_code, expected_message, remaining):
|
||||
kb_id, document_ids = add_documents_func
|
||||
if callable(payload):
|
||||
payload = payload(document_ids)
|
||||
res = delete_document(WebApiAuth, payload)
|
||||
res = delete_document(WebApiAuth, kb_id, payload)
|
||||
assert res["code"] == expected_code, res
|
||||
if res["code"] != 0:
|
||||
assert res["message"] == expected_message, res
|
||||
assert expected_message in res["message"], res
|
||||
|
||||
res = list_documents(WebApiAuth, {"kb_id": kb_id})
|
||||
assert len(res["data"]["docs"]) == remaining, res
|
||||
@@ -69,57 +70,46 @@ class TestDocumentsDeletion:
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_repeated_deletion(self, WebApiAuth, add_documents_func):
|
||||
_, document_ids = add_documents_func
|
||||
kb_id, document_ids = add_documents_func
|
||||
for doc_id in document_ids:
|
||||
res = delete_document(WebApiAuth, {"doc_id": doc_id})
|
||||
res = delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
|
||||
assert res["code"] == 0, res
|
||||
|
||||
for doc_id in document_ids:
|
||||
res = delete_document(WebApiAuth, {"doc_id": doc_id})
|
||||
assert res["code"] == 109, res
|
||||
assert res["message"] == "No authorization.", res
|
||||
res = delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
|
||||
assert res["code"] == 102, res
|
||||
assert res["message"] == "Document not found!", res
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_delete_all(self, WebApiAuth, add_documents_func):
|
||||
kb_id, document_ids = add_documents_func
|
||||
|
||||
res = delete_document(WebApiAuth, kb_id, {"delete_all": True})
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_documents(WebApiAuth, {"kb_id": kb_id})
|
||||
assert len(res["data"]["docs"]) == 0, res
|
||||
assert res["data"]["total"] == 0, res
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
class TestDocumentsDeletionUnit:
|
||||
def test_rm_string_doc_id_normalization_success_unit(self, document_app_module, monkeypatch):
|
||||
module = document_app_module
|
||||
captured = {}
|
||||
|
||||
async def fake_request_json():
|
||||
return {"doc_id": "doc1"}
|
||||
|
||||
async def fake_thread_pool_exec(func, doc_ids, user_id):
|
||||
captured["func"] = func
|
||||
captured["doc_ids"] = doc_ids
|
||||
captured["user_id"] = user_id
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(module, "get_request_json", fake_request_json)
|
||||
monkeypatch.setattr(module.DocumentService, "accessible4deletion", lambda *_args, **_kwargs: True)
|
||||
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
|
||||
res = _run(module.rm.__wrapped__())
|
||||
assert res["code"] == 0
|
||||
assert res["data"] is True
|
||||
assert captured["func"] == module.FileService.delete_docs
|
||||
assert captured["doc_ids"] == ["doc1"]
|
||||
assert captured["user_id"] == module.current_user.id
|
||||
|
||||
|
||||
@pytest.mark.p3
|
||||
def test_concurrent_deletion(WebApiAuth, add_dataset, tmp_path):
|
||||
count = 100
|
||||
kb_id = add_dataset
|
||||
document_ids = bulk_upload_documents(WebApiAuth, kb_id, count, tmp_path)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(delete_document, WebApiAuth, {"doc_id": document_ids[i]}) for i in range(count)]
|
||||
futures = [executor.submit(delete_document, WebApiAuth, kb_id, {"ids": [document_ids[i]]}) for i in range(count)]
|
||||
responses = list(as_completed(futures))
|
||||
assert len(responses) == count, responses
|
||||
assert all(future.result()["code"] == 0 for future in futures), responses
|
||||
|
||||
res = list_documents(WebApiAuth, {"kb_id": kb_id})
|
||||
assert len(res["data"]["docs"]) == 0, res
|
||||
assert res["data"]["total"] == 0, res
|
||||
|
||||
@pytest.mark.p3
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_delete_100(WebApiAuth, add_dataset, tmp_path):
|
||||
documents_num = 100
|
||||
kb_id = add_dataset
|
||||
@@ -128,7 +118,7 @@ def test_delete_100(WebApiAuth, add_dataset, tmp_path):
|
||||
assert res["data"]["total"] == documents_num, res
|
||||
|
||||
for doc_id in document_ids:
|
||||
res = delete_document(WebApiAuth, {"doc_id": doc_id})
|
||||
res = delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
|
||||
assert res["code"] == 0, res
|
||||
|
||||
res = list_documents(WebApiAuth, {"kb_id": kb_id})
|
||||
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
import i18n from '@/locales/config';
|
||||
import { EMPTY_METADATA_FIELD } from '@/pages/dataset/dataset/use-select-filters';
|
||||
import kbService, {
|
||||
deleteDocument,
|
||||
documentFilter,
|
||||
listDocument,
|
||||
renameDocument,
|
||||
@@ -315,6 +316,7 @@ export const useRunDocument = () => {
|
||||
|
||||
export const useRemoveDocument = () => {
|
||||
const queryClient = useQueryClient();
|
||||
const { id: datasetId } = useParams();
|
||||
const {
|
||||
data,
|
||||
isPending: loading,
|
||||
@@ -322,7 +324,8 @@ export const useRemoveDocument = () => {
|
||||
} = useMutation({
|
||||
mutationKey: [DocumentApiAction.RemoveDocument],
|
||||
mutationFn: async (documentIds: string | string[]) => {
|
||||
const { data } = await kbService.documentRm({ doc_id: documentIds });
|
||||
const ids = Array.isArray(documentIds) ? documentIds : [documentIds];
|
||||
const { data } = await deleteDocument(datasetId!, ids);
|
||||
if (data.code === 0) {
|
||||
message.success(i18n.t('message.deleted'));
|
||||
queryClient.invalidateQueries({
|
||||
@@ -435,7 +438,7 @@ export const useSetDocumentMeta = () => {
|
||||
}
|
||||
return data?.code;
|
||||
} catch (error) {
|
||||
message.error('error');
|
||||
message.error('error:' + error);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
@@ -18,8 +18,6 @@ const {
|
||||
kbList,
|
||||
getDocumentList,
|
||||
documentChangeStatus,
|
||||
documentRm,
|
||||
documentDelete,
|
||||
documentCreate,
|
||||
documentChangeParser,
|
||||
documentThumbnails,
|
||||
@@ -72,10 +70,6 @@ const methods = {
|
||||
url: documentChangeStatus,
|
||||
method: 'post',
|
||||
},
|
||||
documentRm: {
|
||||
url: documentRm,
|
||||
method: 'post',
|
||||
},
|
||||
documentCreate: {
|
||||
url: documentCreate,
|
||||
method: 'post',
|
||||
@@ -137,10 +131,6 @@ const methods = {
|
||||
url: knowledgeGraph,
|
||||
method: 'get',
|
||||
},
|
||||
documentDelete: {
|
||||
url: documentDelete,
|
||||
method: 'delete',
|
||||
},
|
||||
listTagByKnowledgeIds: {
|
||||
url: listTagByKnowledgeIds,
|
||||
method: 'get',
|
||||
@@ -276,6 +266,9 @@ export const renameDocument = (
|
||||
data: { name?: string },
|
||||
) => request.patch(api.documentRename(datasetId, documentId), { data });
|
||||
|
||||
export const deleteDocument = (datasetId: string, documentIds: string[]) =>
|
||||
request.delete(api.documentDelete(datasetId), { data: { ids: documentIds } });
|
||||
|
||||
export const getMetaDataService = ({
|
||||
kb_id,
|
||||
doc_ids,
|
||||
|
||||
@@ -109,8 +109,8 @@ export default {
|
||||
getDocumentList: (datasetId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents`,
|
||||
documentChangeStatus: `${webAPI}/document/change_status`,
|
||||
documentRm: `${webAPI}/document/rm`,
|
||||
documentDelete: `${webAPI}/api/document`,
|
||||
documentDelete: (datasetId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents`,
|
||||
documentRename: (datasetId: string, documentId: string) =>
|
||||
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
|
||||
documentCreate: `${webAPI}/document/create`,
|
||||
|
||||
Reference in New Issue
Block a user