Refactor: Consolidation WEB API & HTTP API for document delete api (#14254)

### What problem does this PR solve?

Before consolidation
Web API: POST /v1/document/rm
Http API - DELETE /api/v1/datasets/<dataset_id>/documents

After consolidation, Restful API -- DELETE
/api/v1/datasets/<dataset_id>/documents

### Type of change

- [x] Refactoring
This commit is contained in:
Jack
2026-04-22 10:49:52 +08:00
committed by GitHub
parent 6baf74afc1
commit 3d8a82c0aa
15 changed files with 178 additions and 278 deletions

View File

@@ -319,27 +319,6 @@ async def change_status():
return get_json_result(data=result)
@manager.route("/rm", methods=["POST"]) # noqa: F821
@login_required
@validate_request("doc_id")
async def rm():
req = await get_request_json()
doc_ids = req["doc_id"]
if isinstance(doc_ids, str):
doc_ids = [doc_ids]
for doc_id in doc_ids:
if not DocumentService.accessible4deletion(doc_id, current_user.id):
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
errors = await thread_pool_exec(FileService.delete_docs, doc_ids, current_user.id)
if errors:
return get_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
return get_json_result(data=True)
@manager.route("/run", methods=["POST"]) # noqa: F821
@login_required
@validate_request("doc_ids", "run")

View File

@@ -27,14 +27,17 @@ from api.constants import IMG_BASE64_PREFIX
from api.db import VALID_FILE_TYPES
from api.db.services.doc_metadata_service import DocMetadataService
from api.db.services.document_service import DocumentService
from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.common.check_team_permission import check_kb_team_permission
from api.utils.api_utils import get_data_error_result, get_error_data_result, get_result, get_json_result, \
server_error_response, add_tenant_id_to_kwargs, get_request_json
server_error_response, add_tenant_id_to_kwargs, get_request_json, get_error_argument_result, check_duplicate_ids
from api.utils.validation_utils import (
UpdateDocumentReq, format_validation_error_message,
UpdateDocumentReq, format_validation_error_message, validate_and_parse_json_request, DeleteDocumentReq,
)
from common.constants import RetCode
from common.metadata_utils import convert_conditions, meta_filter, turn2jsonschema
from common.misc_utils import thread_pool_exec
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["PATCH"]) # noqa: F821
@login_required
@@ -260,9 +263,7 @@ async def upload_document(dataset_id, tenant_id):
description: Processing status.
"""
from api.constants import FILE_NAME_LEN_LIMIT
from api.common.check_team_permission import check_kb_team_permission
from api.db.services.file_service import FileService
from common.misc_utils import thread_pool_exec
form = await request.form
files = await request.files
@@ -674,6 +675,89 @@ def _parse_doc_id_filter_with_metadata(req, kb_id):
return RetCode.SUCCESS, "", list(doc_ids_filter) if doc_ids_filter is not None else [], return_empty_metadata
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
@login_required
@add_tenant_id_to_kwargs
async def delete_documents(tenant_id, dataset_id):
"""
Delete documents from a dataset.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: path
name: dataset_id
type: string
required: true
description: ID of the dataset containing the documents.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
- in: body
name: body
description: Document deletion parameters.
required: true
schema:
type: object
properties:
ids:
type: array or null
items:
type: string
description: |
Specifies the documents to delete:
- An array of IDs, only the specified documents will be deleted.
delete_all:
type: boolean
default: false
description: Whether to delete all documents in the dataset.
responses:
200:
description: Successful operation.
schema:
type: object
"""
req, err = await validate_and_parse_json_request(request, DeleteDocumentReq)
if err is not None or req is None:
return get_error_argument_result(err)
try:
# Validate dataset exists and user has permission
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
# Get documents to delete
doc_ids = req.get("ids") or []
delete_all = req.get("delete_all", False)
if not delete_all and len(doc_ids) == 0:
return get_error_data_result(message=f"should either provide doc ids or set delete_all(true), dataset: {dataset_id}. ")
if len(doc_ids) > 0 and delete_all:
return get_error_data_result(message=f"should not provide both doc ids and delete_all(true), dataset: {dataset_id}. ")
if delete_all:
doc_ids = [doc.id for doc in DocumentService.query(kb_id=dataset_id)]
# make sure each id is unique
unique_doc_ids, duplicate_messages = check_duplicate_ids(doc_ids, "document")
if duplicate_messages:
logging.warning(f"duplicate_messages:{duplicate_messages}")
else:
doc_ids = unique_doc_ids
# Delete documents using existing FileService.delete_docs
errors = await thread_pool_exec(FileService.delete_docs, doc_ids, tenant_id)
if errors:
return get_error_data_result(message=str(errors))
return get_result(data={"deleted": len(doc_ids)})
except Exception as e:
logging.exception(e)
return get_error_data_result(message="Internal server error")
def _aggregate_filters(docs):
"""Aggregate filter options from a list of documents.

View File

@@ -21,12 +21,11 @@ import xxhash
from pydantic import BaseModel, Field, validator
from quart import request, send_file
from api.db.db_models import APIToken, Document, File, Task
from api.db.db_models import APIToken, Document, Task
from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_model_config_by_type_and_name, get_tenant_default_model_by_type
from api.db.services.doc_metadata_service import DocMetadataService
from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMBundle
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
@@ -34,7 +33,7 @@ from api.db.services.tenant_llm_service import TenantLLMService
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required
from api.utils.image_utils import store_chunk_image
from common import settings
from common.constants import FileSource, LLMType, ParserType, RetCode, TaskStatus
from common.constants import LLMType, ParserType, RetCode, TaskStatus
from common.metadata_utils import convert_conditions, meta_filter
from common.misc_utils import thread_pool_exec
from common.string_utils import is_content_empty, remove_redundant_spaces
@@ -209,120 +208,6 @@ async def metadata_batch_update(dataset_id, tenant_id):
return get_result(data={"updated": updated, "matched_docs": len(target_doc_ids)})
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
@token_required
async def delete(tenant_id, dataset_id):
"""
Delete documents from a dataset.
---
tags:
- Documents
security:
- ApiKeyAuth: []
parameters:
- in: path
name: dataset_id
type: string
required: true
description: ID of the dataset.
- in: body
name: body
description: Document deletion parameters.
required: true
schema:
type: object
properties:
ids:
type: array
items:
type: string
description: |
List of document IDs to delete.
If omitted, `null`, or an empty array is provided, no documents will be deleted.
- in: header
name: Authorization
type: string
required: true
description: Bearer token for authentication.
responses:
200:
description: Documents deleted successfully.
schema:
type: object
"""
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
req = await get_request_json()
if not req:
return get_result()
doc_ids = req.get("ids")
if not doc_ids:
if req.get("delete_all") is True:
doc_ids = [doc.id for doc in DocumentService.query(kb_id=dataset_id)]
if not doc_ids:
return get_result()
else:
return get_result()
doc_list = doc_ids
unique_doc_ids, duplicate_messages = check_duplicate_ids(doc_list, "document")
doc_list = unique_doc_ids
root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"]
FileService.init_knowledgebase_docs(pf_id, tenant_id)
errors = ""
not_found = []
success_count = 0
for doc_id in doc_list:
try:
e, doc = DocumentService.get_by_id(doc_id)
if not e:
not_found.append(doc_id)
continue
tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id:
return get_error_data_result(message="Tenant not found!")
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
if not DocumentService.remove_document(doc, tenant_id):
return get_error_data_result(message="Database error (Document removal)!")
f2d = File2DocumentService.get_by_document_id(doc_id)
FileService.filter_delete(
[
File.source_type == FileSource.KNOWLEDGEBASE,
File.id == f2d[0].file_id,
]
)
File2DocumentService.delete_by_document_id(doc_id)
settings.STORAGE_IMPL.rm(b, n)
success_count += 1
except Exception as e:
errors += str(e)
if not_found:
return get_result(message=f"Documents not found: {not_found}", code=RetCode.DATA_ERROR)
if errors:
return get_result(message=errors, code=RetCode.SERVER_ERROR)
if duplicate_messages:
if success_count > 0:
return get_result(
message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors",
data={"success_count": success_count, "errors": duplicate_messages},
)
else:
return get_error_data_result(message=";".join(duplicate_messages))
return get_result()
DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed"
DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE"

View File

@@ -818,6 +818,9 @@ class DeleteReq(Base):
class DeleteDatasetReq(DeleteReq): ...
class DeleteDocumentReq(DeleteReq): ...
class BaseListReq(BaseModel):
model_config = ConfigDict(extra="forbid")

View File

@@ -26,11 +26,11 @@ class TestAuthorization:
@pytest.mark.parametrize(
"invalid_auth, expected_code, expected_message",
[
(None, 0, "`Authorization` can't be empty"),
(None, 401, "<Unauthorized '401: Unauthorized'>"),
(
RAGFlowHttpApiAuth(INVALID_API_TOKEN),
109,
"Authentication error: API key is invalid!",
401,
"<Unauthorized '401: Unauthorized'>",
),
],
)
@@ -45,19 +45,19 @@ class TestDocumentsDeletion:
@pytest.mark.parametrize(
"payload, expected_code, expected_message, remaining",
[
(None, 0, "", 3),
({"ids": []}, 0, "", 3),
({"ids": ["invalid_id"]}, 102, "Documents not found: ['invalid_id']", 3),
({}, 102, "should either provide doc ids or set delete_all(true), dataset", 3),
({"ids": []}, 102, "should either provide doc ids or set delete_all(true), dataset", 3),
({"ids": ["invalid_id"]}, 101, "Field: <ids> - Message: <Invalid UUID1 format> - Value: <['invalid_id']>", 3),
(
{"ids": ["\n!?。;!?\"'"]},
102,
"""Documents not found: [\'\\n!?。;!?"\\\'\']""",
101,
"Field: <ids> - Message: <Invalid UUID1 format> - Value:",
3,
),
(
"not json",
100,
"AttributeError(\"'str' object has no attribute 'get'\")",
101,
"Invalid request payload: expected object, got str",
3,
),
(lambda r: {"ids": r[:1]}, 0, "", 2),
@@ -79,7 +79,7 @@ class TestDocumentsDeletion:
res = delete_documents(HttpApiAuth, dataset_id, payload)
assert res["code"] == expected_code
if res["code"] != 0:
assert res["message"] == expected_message
assert expected_message in res["message"]
res = list_documents(HttpApiAuth, dataset_id)
assert len(res["data"]["docs"]) == remaining
@@ -117,12 +117,12 @@ class TestDocumentsDeletion:
if callable(payload):
payload = payload(document_ids)
res = delete_documents(HttpApiAuth, dataset_id, payload)
assert res["code"] == 102
assert res["message"] == "Documents not found: ['invalid_id']"
assert res["code"] == 101
assert "Field: <ids> - Message: <Invalid UUID1 format> - Value" in res["message"]
res = list_documents(HttpApiAuth, dataset_id)
assert len(res["data"]["docs"]) == 0
assert res["data"]["total"] == 0
assert len(res["data"]["docs"]) == 3
assert res["data"]["total"] == 3
@pytest.mark.p2
def test_repeated_deletion(self, HttpApiAuth, add_documents_func):
@@ -132,19 +132,18 @@ class TestDocumentsDeletion:
res = delete_documents(HttpApiAuth, dataset_id, {"ids": document_ids})
assert res["code"] == 102
assert "Documents not found" in res["message"]
assert "Document not found" in res["message"]
@pytest.mark.p2
def test_duplicate_deletion(self, HttpApiAuth, add_documents_func):
dataset_id, document_ids = add_documents_func
res = delete_documents(HttpApiAuth, dataset_id, {"ids": document_ids + document_ids})
assert res["code"] == 0
assert "Duplicate document ids" in res["data"]["errors"][0]
assert res["data"]["success_count"] == 3
assert res["code"] == 101, res
assert "Field: <ids> - Message: <Duplicate ids:" in res["message"]
res = list_documents(HttpApiAuth, dataset_id)
assert len(res["data"]["docs"]) == 0
assert res["data"]["total"] == 0
assert len(res["data"]["docs"]) == 3
assert res["data"]["total"] == 3
@pytest.mark.p3

View File

@@ -478,46 +478,6 @@ class TestDocRoutesUnit:
assert res["data"]["matched_docs"] == 1
def test_delete_branches(self, monkeypatch):
module = _load_doc_module(monkeypatch)
monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False)
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
assert "don't own the dataset" in res["message"]
monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: True)
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({}))
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
assert res["code"] == module.RetCode.SUCCESS
monkeypatch.setattr(module, "get_request_json", lambda: _AwaitableValue({"ids": ["doc-1"]}))
monkeypatch.setattr(module, "check_duplicate_ids", lambda ids, _kind: (ids, []))
monkeypatch.setattr(module.FileService, "get_root_folder", lambda _tenant: {"id": "pf-1"})
monkeypatch.setattr(module.FileService, "init_knowledgebase_docs", lambda *_args, **_kwargs: None)
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _id: (True, _DummyDoc()))
monkeypatch.setattr(module.DocumentService, "get_tenant_id", lambda _id: None)
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
assert res["message"] == "Tenant not found!"
monkeypatch.setattr(module.DocumentService, "get_tenant_id", lambda _id: "tenant-1")
monkeypatch.setattr(module.File2DocumentService, "get_storage_address", lambda **_kwargs: ("b", "n"))
monkeypatch.setattr(module.DocumentService, "remove_document", lambda *_args, **_kwargs: False)
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
assert "Document removal" in res["message"]
def _raise_get_by_id(_id):
raise RuntimeError("boom")
monkeypatch.setattr(module.DocumentService, "get_by_id", _raise_get_by_id)
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
assert res["code"] == module.RetCode.SERVER_ERROR
assert "boom" in res["message"]
monkeypatch.setattr(module, "check_duplicate_ids", lambda _ids, _kind: ([], ["Duplicate document ids: doc-1"]))
monkeypatch.setattr(module.DocumentService, "get_by_id", lambda _id: (False, None))
res = _run(module.delete.__wrapped__("tenant-1", "ds-1"))
assert res["code"] == module.RetCode.DATA_ERROR
assert "Duplicate document ids" in res["message"]
def test_parse_branches(self, monkeypatch):
module = _load_doc_module(monkeypatch)
monkeypatch.setattr(module.KnowledgebaseService, "accessible", lambda **_kwargs: False)

View File

@@ -24,10 +24,10 @@ class TestDocumentsDeletion:
@pytest.mark.parametrize(
"payload, expected_message, remaining",
[
({"ids": None}, "", 3),
({"ids": []}, "", 3),
({"ids": ["invalid_id"]}, "Documents not found: ['invalid_id']", 3),
({"ids": ["\n!?。;!?\"'"]}, "Documents not found: ['\\n!?。;!?\"\\'']", 3),
({"ids": None}, "should either provide doc ids or set delete_all(true), dataset:", 3),
({"ids": []}, "should either provide doc ids or set delete_all(true), dataset:", 3),
({"ids": ["invalid_id"]}, "Field: <ids> - Message: <Invalid UUID1 format> - Value: <['invalid_id']>", 3),
({"ids": ["\n!?。;!?\"'"]}, "Field: <ids> - Message: <Invalid UUID1 format> - Value:", 3),
("not json", "must be a mapping", 3),
(lambda r: {"ids": r[:1]}, "", 2),
(lambda r: {"ids": r}, "", 0),
@@ -69,10 +69,10 @@ class TestDocumentsDeletion:
with pytest.raises(Exception) as exception_info:
dataset.delete_documents(**payload)
assert "Documents not found: ['invalid_id']" in str(exception_info.value), str(exception_info.value)
assert "Field: <ids> - Message: <Invalid UUID1 format> - Value: <" in str(exception_info.value), str(exception_info.value)
documents = dataset.list_documents()
assert len(documents) == 0, str(documents)
assert len(documents) == 3, str(documents)
@pytest.mark.p2
def test_repeated_deletion(self, add_documents_func):
@@ -81,14 +81,16 @@ class TestDocumentsDeletion:
dataset.delete_documents(ids=document_ids)
with pytest.raises(Exception) as exception_info:
dataset.delete_documents(ids=document_ids)
assert "Documents not found" in str(exception_info.value), str(exception_info.value)
assert "Document not found" in str(exception_info.value), str(exception_info.value)
@pytest.mark.p2
def test_duplicate_deletion(self, add_documents_func):
dataset, documents = add_documents_func
document_ids = [document.id for document in documents]
dataset.delete_documents(ids=document_ids + document_ids)
assert len(dataset.list_documents()) == 0, str(dataset.list_documents())
with pytest.raises(Exception) as exception_info:
dataset.delete_documents(ids=document_ids + document_ids)
assert "Field: <ids> - Message: <Duplicate ids:" in str(exception_info.value), str(exception_info.value)
assert len(dataset.list_documents()) == 3, str(dataset.list_documents())
@pytest.mark.p3

View File

@@ -218,8 +218,8 @@ class TestAddChunk:
@pytest.mark.p2
def test_add_chunk_to_deleted_document(self, WebApiAuth, add_document):
_, doc_id = add_document
delete_document(WebApiAuth, {"doc_id": doc_id})
kb_id, doc_id = add_document
delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
res = add_chunk(WebApiAuth, {"doc_id": doc_id, "content_with_weight": "chunk test"})
assert res["code"] == 102, res
assert res["message"] == "Document not found!", res

View File

@@ -251,8 +251,8 @@ class TestUpdateChunk:
@pytest.mark.p3
def test_update_chunk_to_deleted_document(self, WebApiAuth, add_chunks):
_, doc_id, chunk_ids = add_chunks
delete_document(WebApiAuth, {"doc_id": doc_id})
kb_id, doc_id, chunk_ids = add_chunks
delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
payload = {"doc_id": doc_id, "chunk_id": chunk_ids[0], "content_with_weight": "test content"}
res = update_chunk(WebApiAuth, payload)
assert res["code"] == 102, res

View File

@@ -382,8 +382,10 @@ def list_documents(auth, params=None, payload=None, *, headers=HEADERS, data=Non
return res.json()
def delete_document(auth, payload=None, *, headers=HEADERS, data=None):
res = requests.post(url=f"{HOST_ADDRESS}{DOCUMENT_APP_URL}/rm", headers=headers, auth=auth, json=payload, data=data)
def delete_document(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
# New API: DELETE /api/v1/datasets/<dataset_id>/documents
url = f"{HOST_ADDRESS}{DATASETS_URL}/{dataset_id}/documents"
res = requests.delete(url=url, headers=headers, auth=auth, json=payload, data=data)
return res.json()

View File

@@ -36,7 +36,7 @@ def add_document_func(request, WebApiAuth, add_dataset, ragflow_tmp_dir):
def cleanup():
res = list_documents(WebApiAuth, {"kb_id": dataset_id})
for doc in res["data"]["docs"]:
delete_document(WebApiAuth, {"doc_id": doc["id"]})
delete_document(WebApiAuth, dataset_id, {"ids": [doc["id"]]})
request.addfinalizer(cleanup)
@@ -49,7 +49,7 @@ def add_documents(request, WebApiAuth, add_dataset, ragflow_tmp_dir):
def cleanup():
res = list_documents(WebApiAuth, {"kb_id": dataset_id})
for doc in res["data"]["docs"]:
delete_document(WebApiAuth, {"doc_id": doc["id"]})
delete_document(WebApiAuth, dataset_id, {"ids": [doc["id"]]})
request.addfinalizer(cleanup)
@@ -62,7 +62,7 @@ def add_documents_func(request, WebApiAuth, add_dataset_func, ragflow_tmp_dir):
def cleanup():
res = list_documents(WebApiAuth, {"kb_id": dataset_id})
for doc in res["data"]["docs"]:
delete_document(WebApiAuth, {"doc_id": doc["id"]})
delete_document(WebApiAuth, dataset_id, {"ids": [doc["id"]]})
request.addfinalizer(cleanup)

View File

@@ -36,7 +36,7 @@ class TestAuthorization:
],
)
def test_invalid_auth(self, invalid_auth, expected_code, expected_message):
res = delete_document(invalid_auth)
res = delete_document(invalid_auth, "kb_id")
assert res["code"] == expected_code, res
assert res["message"] == expected_message, res
@@ -46,22 +46,23 @@ class TestDocumentsDeletion:
@pytest.mark.parametrize(
"payload, expected_code, expected_message, remaining",
[
(None, 101, "required argument are missing: doc_id; ", 3),
({"doc_id": ""}, 109, "No authorization.", 3),
({"doc_id": "invalid_id"}, 109, "No authorization.", 3),
({"doc_id": "\n!?。;!?\"'"}, 109, "No authorization.", 3),
("not json", 101, "required argument are missing: doc_id; ", 3),
(lambda r: {"doc_id": r[0]}, 0, "", 2),
({}, 102, "should either provide doc ids or set delete_all(true), dataset:", 3),
({"invalid_key":[]}, 101, "Field: <invalid_key> - Message: <Extra inputs are not permitted> - Value: <[]>", 3),
({"ids": ""}, 101, "Field: <ids> - Message: <Input should be a valid list> - Value: <>", 3),
({"ids": ["invalid_id"]}, 101, "Field: <ids> - Message: <Invalid UUID1 format> - Value:", 3),
("not json", 101, "Invalid request payload: expected object, got str", 3),
(lambda r: {"ids": r[0]}, 101, "Field: <ids> - Message: <Input should be a valid list> - Value", 3),
(lambda r: {"ids": r}, 0, "", 0),
],
)
def test_basic_scenarios(self, WebApiAuth, add_documents_func, payload, expected_code, expected_message, remaining):
kb_id, document_ids = add_documents_func
if callable(payload):
payload = payload(document_ids)
res = delete_document(WebApiAuth, payload)
res = delete_document(WebApiAuth, kb_id, payload)
assert res["code"] == expected_code, res
if res["code"] != 0:
assert res["message"] == expected_message, res
assert expected_message in res["message"], res
res = list_documents(WebApiAuth, {"kb_id": kb_id})
assert len(res["data"]["docs"]) == remaining, res
@@ -69,57 +70,46 @@ class TestDocumentsDeletion:
@pytest.mark.p2
def test_repeated_deletion(self, WebApiAuth, add_documents_func):
_, document_ids = add_documents_func
kb_id, document_ids = add_documents_func
for doc_id in document_ids:
res = delete_document(WebApiAuth, {"doc_id": doc_id})
res = delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
assert res["code"] == 0, res
for doc_id in document_ids:
res = delete_document(WebApiAuth, {"doc_id": doc_id})
assert res["code"] == 109, res
assert res["message"] == "No authorization.", res
res = delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
assert res["code"] == 102, res
assert res["message"] == "Document not found!", res
@pytest.mark.p2
def test_delete_all(self, WebApiAuth, add_documents_func):
kb_id, document_ids = add_documents_func
res = delete_document(WebApiAuth, kb_id, {"delete_all": True})
assert res["code"] == 0, res
res = list_documents(WebApiAuth, {"kb_id": kb_id})
assert len(res["data"]["docs"]) == 0, res
assert res["data"]["total"] == 0, res
@pytest.mark.p2
class TestDocumentsDeletionUnit:
def test_rm_string_doc_id_normalization_success_unit(self, document_app_module, monkeypatch):
module = document_app_module
captured = {}
async def fake_request_json():
return {"doc_id": "doc1"}
async def fake_thread_pool_exec(func, doc_ids, user_id):
captured["func"] = func
captured["doc_ids"] = doc_ids
captured["user_id"] = user_id
return None
monkeypatch.setattr(module, "get_request_json", fake_request_json)
monkeypatch.setattr(module.DocumentService, "accessible4deletion", lambda *_args, **_kwargs: True)
monkeypatch.setattr(module, "thread_pool_exec", fake_thread_pool_exec)
res = _run(module.rm.__wrapped__())
assert res["code"] == 0
assert res["data"] is True
assert captured["func"] == module.FileService.delete_docs
assert captured["doc_ids"] == ["doc1"]
assert captured["user_id"] == module.current_user.id
@pytest.mark.p3
def test_concurrent_deletion(WebApiAuth, add_dataset, tmp_path):
count = 100
kb_id = add_dataset
document_ids = bulk_upload_documents(WebApiAuth, kb_id, count, tmp_path)
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(delete_document, WebApiAuth, {"doc_id": document_ids[i]}) for i in range(count)]
futures = [executor.submit(delete_document, WebApiAuth, kb_id, {"ids": [document_ids[i]]}) for i in range(count)]
responses = list(as_completed(futures))
assert len(responses) == count, responses
assert all(future.result()["code"] == 0 for future in futures), responses
res = list_documents(WebApiAuth, {"kb_id": kb_id})
assert len(res["data"]["docs"]) == 0, res
assert res["data"]["total"] == 0, res
@pytest.mark.p3
@pytest.mark.p2
def test_delete_100(WebApiAuth, add_dataset, tmp_path):
documents_num = 100
kb_id = add_dataset
@@ -128,7 +118,7 @@ def test_delete_100(WebApiAuth, add_dataset, tmp_path):
assert res["data"]["total"] == documents_num, res
for doc_id in document_ids:
res = delete_document(WebApiAuth, {"doc_id": doc_id})
res = delete_document(WebApiAuth, kb_id, {"ids": [doc_id]})
assert res["code"] == 0, res
res = list_documents(WebApiAuth, {"kb_id": kb_id})

View File

@@ -16,6 +16,7 @@ import {
import i18n from '@/locales/config';
import { EMPTY_METADATA_FIELD } from '@/pages/dataset/dataset/use-select-filters';
import kbService, {
deleteDocument,
documentFilter,
listDocument,
renameDocument,
@@ -315,6 +316,7 @@ export const useRunDocument = () => {
export const useRemoveDocument = () => {
const queryClient = useQueryClient();
const { id: datasetId } = useParams();
const {
data,
isPending: loading,
@@ -322,7 +324,8 @@ export const useRemoveDocument = () => {
} = useMutation({
mutationKey: [DocumentApiAction.RemoveDocument],
mutationFn: async (documentIds: string | string[]) => {
const { data } = await kbService.documentRm({ doc_id: documentIds });
const ids = Array.isArray(documentIds) ? documentIds : [documentIds];
const { data } = await deleteDocument(datasetId!, ids);
if (data.code === 0) {
message.success(i18n.t('message.deleted'));
queryClient.invalidateQueries({
@@ -435,7 +438,7 @@ export const useSetDocumentMeta = () => {
}
return data?.code;
} catch (error) {
message.error('error');
message.error('error:' + error);
}
},
});

View File

@@ -18,8 +18,6 @@ const {
kbList,
getDocumentList,
documentChangeStatus,
documentRm,
documentDelete,
documentCreate,
documentChangeParser,
documentThumbnails,
@@ -72,10 +70,6 @@ const methods = {
url: documentChangeStatus,
method: 'post',
},
documentRm: {
url: documentRm,
method: 'post',
},
documentCreate: {
url: documentCreate,
method: 'post',
@@ -137,10 +131,6 @@ const methods = {
url: knowledgeGraph,
method: 'get',
},
documentDelete: {
url: documentDelete,
method: 'delete',
},
listTagByKnowledgeIds: {
url: listTagByKnowledgeIds,
method: 'get',
@@ -276,6 +266,9 @@ export const renameDocument = (
data: { name?: string },
) => request.patch(api.documentRename(datasetId, documentId), { data });
export const deleteDocument = (datasetId: string, documentIds: string[]) =>
request.delete(api.documentDelete(datasetId), { data: { ids: documentIds } });
export const getMetaDataService = ({
kb_id,
doc_ids,

View File

@@ -109,8 +109,8 @@ export default {
getDocumentList: (datasetId: string) =>
`${restAPIv1}/datasets/${datasetId}/documents`,
documentChangeStatus: `${webAPI}/document/change_status`,
documentRm: `${webAPI}/document/rm`,
documentDelete: `${webAPI}/api/document`,
documentDelete: (datasetId: string) =>
`${restAPIv1}/datasets/${datasetId}/documents`,
documentRename: (datasetId: string, documentId: string) =>
`${restAPIv1}/datasets/${datasetId}/documents/${documentId}`,
documentCreate: `${webAPI}/document/create`,