From 05ee7f8bb68836b099bd99eac079ad5e1e5bc544 Mon Sep 17 00:00:00 2001 From: buua436 Date: Thu, 30 Apr 2026 18:56:33 +0800 Subject: [PATCH] Fix: remove delete_documents uuid validation (#14533) ### What problem does this PR solve? remove delete_documents uuid validation ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/utils/validation_utils.py | 28 +++++++++++++++++-- .../test_delete_documents.py | 10 +++---- .../test_delete_documents.py | 6 ++-- .../test_document_app/test_rm_documents.py | 2 +- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/api/utils/validation_utils.py b/api/utils/validation_utils.py index f570bacc3d..20017f6376 100644 --- a/api/utils/validation_utils.py +++ b/api/utils/validation_utils.py @@ -814,6 +814,31 @@ class DeleteReq(Base): class DeleteDatasetReq(DeleteReq): ... +class DeleteDocumentReq(DeleteReq): + @field_validator("ids", mode="after") + @classmethod + def validate_ids(cls, v_list: list[str] | None) -> list[str] | None: + """ + Validate document IDs without enforcing UUIDv1. + + Connector-backed documents can use non-UUID identifiers, so we only + enforce uniqueness here and leave existence checks to the delete API. + """ + if v_list is None: + return None + + duplicates = [item for item, count in Counter(v_list).items() if count > 1] + if duplicates: + duplicates_str = ", ".join(duplicates) + raise PydanticCustomError( + "duplicate_uuids", + "Duplicate ids: '{duplicate_ids}'", + {"duplicate_ids": duplicates_str}, + ) + + return v_list + + class SearchDatasetReq(BaseModel): model_config = ConfigDict(extra="ignore") @@ -833,9 +858,6 @@ class SearchDatasetReq(BaseModel): meta_data_filter: Annotated[dict | None, Field(default=None)] -class DeleteDocumentReq(DeleteReq): ... - - class BaseListReq(BaseModel): model_config = ConfigDict(extra="forbid") diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_delete_documents.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_delete_documents.py index 82fdb413ee..3716f38c59 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_delete_documents.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_delete_documents.py @@ -47,11 +47,11 @@ class TestDocumentsDeletion: [ ({}, 102, "should either provide doc ids or set delete_all(true), dataset", 3), ({"ids": []}, 102, "should either provide doc ids or set delete_all(true), dataset", 3), - ({"ids": ["invalid_id"]}, 101, "Field: - Message: - Value: <['invalid_id']>", 3), + ({"ids": ["invalid_id"]}, 102, "These documents do not belong to dataset", 3), ( {"ids": ["\n!?。;!?\"'"]}, - 101, - "Field: - Message: - Value:", + 102, + "These documents do not belong to dataset", 3, ), ( @@ -117,8 +117,8 @@ class TestDocumentsDeletion: if callable(payload): payload = payload(document_ids) res = delete_documents(HttpApiAuth, dataset_id, payload) - assert res["code"] == 101 - assert "Field: - Message: - Value" in res["message"] + assert res["code"] == 102 + assert "These documents do not belong to dataset" in res["message"] res = list_documents(HttpApiAuth, dataset_id) assert len(res["data"]["docs"]) == 3 diff --git a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_delete_documents.py b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_delete_documents.py index fb70f457ad..31627d6e88 100644 --- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_delete_documents.py +++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_delete_documents.py @@ -26,8 +26,8 @@ class TestDocumentsDeletion: [ ({"ids": None}, "should either provide doc ids or set delete_all(true), dataset:", 3), ({"ids": []}, "should either provide doc ids or set delete_all(true), dataset:", 3), - ({"ids": ["invalid_id"]}, "Field: - Message: - Value: <['invalid_id']>", 3), - ({"ids": ["\n!?。;!?\"'"]}, "Field: - Message: - Value:", 3), + ({"ids": ["invalid_id"]}, "These documents do not belong to dataset", 3), + ({"ids": ["\n!?。;!?\"'"]}, "These documents do not belong to dataset", 3), ("not json", "must be a mapping", 3), (lambda r: {"ids": r[:1]}, "", 2), (lambda r: {"ids": r}, "", 0), @@ -69,7 +69,7 @@ class TestDocumentsDeletion: with pytest.raises(Exception) as exception_info: dataset.delete_documents(**payload) - assert "Field: - Message: - Value: <" in str(exception_info.value), str(exception_info.value) + assert "These documents do not belong to dataset" in str(exception_info.value), str(exception_info.value) documents = dataset.list_documents() assert len(documents) == 3, str(documents) diff --git a/test/testcases/test_web_api/test_document_app/test_rm_documents.py b/test/testcases/test_web_api/test_document_app/test_rm_documents.py index 2e8cefdbb0..f0ba072c9d 100644 --- a/test/testcases/test_web_api/test_document_app/test_rm_documents.py +++ b/test/testcases/test_web_api/test_document_app/test_rm_documents.py @@ -49,7 +49,7 @@ class TestDocumentsDeletion: ({}, 102, "should either provide doc ids or set delete_all(true), dataset:", 3), ({"invalid_key":[]}, 101, "Field: - Message: - Value: <[]>", 3), ({"ids": ""}, 101, "Field: - Message: - Value: <>", 3), - ({"ids": ["invalid_id"]}, 101, "Field: - Message: - Value:", 3), + ({"ids": ["invalid_id"]}, 102, "These documents do not belong to dataset", 3), ("not json", 101, "Invalid request payload: expected object, got str", 3), (lambda r: {"ids": r[0]}, 101, "Field: - Message: - Value", 3), (lambda r: {"ids": r}, 0, "", 0),