From 0940f1a13593e2d8e383cec2f0cb9a6ff324ee7d Mon Sep 17 00:00:00 2001
From: Idriss Sbaaoui <112825897+6ba3i@users.noreply.github.com>
Date: Thu, 28 May 2026 11:03:12 +0800
Subject: [PATCH] Feat: add new tests and tescases for restful api suite
 (#15299)

### What problem does this PR solve?

extend restful api suite

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Other (please describe): test
---
 test/testcases/restful_api/test_datasets.py   |  77 +++
 .../restful_api/test_document_raw_routes.py   |  22 +
 test/testcases/restful_api/test_documents.py  | 505 +++++++++++++++++-
 test/testcases/restful_api/test_retrieval.py  |  39 ++
 4 files changed, 642 insertions(+), 1 deletion(-)
diff --git a/test/testcases/restful_api/test_datasets.py b/test/testcases/restful_api/test_datasets.py
index 021fc998b4..fb4fe73c24 100644
--- a/test/testcases/restful_api/test_datasets.py
+++ b/test/testcases/restful_api/test_datasets.py
@@ -2211,6 +2211,83 @@ def test_dataset_get_contract(rest_client, create_dataset):
 
 
 @pytest.mark.p2
+def test_dataset_metadata_config_get_and_update_contract(rest_client, create_dataset):
+    dataset_id = create_dataset("dataset_metadata_config_contract")
+
+    success_res = rest_client.get(f"/datasets/{dataset_id}/metadata/config")
+    assert success_res.status_code == 200
+    success_payload = success_res.json()
+    assert success_payload["code"] == 0, success_payload
+    assert success_payload["data"] == {"metadata": [], "built_in_metadata": []}, success_payload
+
+    for scenario_name, client in (("missing token", RestClient(token=None)), ("invalid token", RestClient(token=INVALID_API_TOKEN))):
+        get_res = client.get(f"/datasets/{dataset_id}/metadata/config")
+        assert get_res.status_code == 401, (scenario_name, get_res.text)
+        get_payload = get_res.json()
+        assert get_payload["code"] == 401, (scenario_name, get_payload)
+        assert get_payload["message"] == "<Unauthorized '401: Unauthorized'>", (scenario_name, get_payload)
+
+        put_res = client.put(
+            f"/datasets/{dataset_id}/metadata/config",
+            json={"metadata": [], "built_in_metadata": []},
+        )
+        assert put_res.status_code == 401, (scenario_name, put_res.text)
+        put_payload = put_res.json()
+        assert put_payload["code"] == 401, (scenario_name, put_payload)
+        assert put_payload["message"] == "<Unauthorized '401: Unauthorized'>", (scenario_name, put_payload)
+
+    invalid_dataset_res = rest_client.get("/datasets/invalid_dataset_id/metadata/config")
+    assert invalid_dataset_res.status_code == 200
+    invalid_dataset_payload = invalid_dataset_res.json()
+    assert invalid_dataset_payload["code"] == 102, invalid_dataset_payload
+    assert "lacks permission for dataset 'invalid_dataset_id'" in invalid_dataset_payload["message"], invalid_dataset_payload
+
+    update_payload = {
+        "metadata": [
+            {"key": "author", "type": "string", "description": "Author name"},
+            {"key": "tags", "type": "list", "description": "Tag list", "enum": ["foo", "bar"]},
+        ],
+        "built_in_metadata": [
+            {"key": "size", "type": "number", "description": "File size"},
+        ],
+    }
+    normalized_update_payload = {
+        "metadata": [
+            {"key": "author", "type": "string", "description": "Author name", "enum": None},
+            {"key": "tags", "type": "list", "description": "Tag list", "enum": ["foo", "bar"]},
+        ],
+        "built_in_metadata": [
+            {"key": "size", "type": "number", "description": "File size", "enum": None},
+        ],
+    }
+    update_res = rest_client.put(f"/datasets/{dataset_id}/metadata/config", json=update_payload)
+    assert update_res.status_code == 200
+    update_body = update_res.json()
+    assert update_body["code"] == 0, update_body
+    assert update_body["data"] == normalized_update_payload, update_body
+
+    refetch_res = rest_client.get(f"/datasets/{dataset_id}/metadata/config")
+    assert refetch_res.status_code == 200
+    refetch_payload = refetch_res.json()
+    assert refetch_payload["code"] == 0, refetch_payload
+    assert refetch_payload["data"] == normalized_update_payload, refetch_payload
+
+    missing_payload_res = rest_client.put(f"/datasets/{dataset_id}/metadata/config", json={})
+    assert missing_payload_res.status_code == 200
+    missing_payload = missing_payload_res.json()
+    assert missing_payload["code"] == 0, missing_payload
+    assert missing_payload["data"] == {"metadata": [], "built_in_metadata": []}, missing_payload
+
+    invalid_update_dataset_res = rest_client.put(
+        "/datasets/invalid_dataset_id/metadata/config",
+        json={"metadata": [], "built_in_metadata": []},
+    )
+    assert invalid_update_dataset_res.status_code == 200
+    invalid_update_dataset_payload = invalid_update_dataset_res.json()
+    assert invalid_update_dataset_payload["code"] == 102, invalid_update_dataset_payload
+    assert "lacks permission for dataset 'invalid_dataset_id'" in invalid_update_dataset_payload["message"], invalid_update_dataset_payload
+
+
 def test_dataset_metadata_summary_contract(rest_client, create_dataset, tmp_path):
     dataset_id = create_dataset("dataset_metadata_summary")
     document_ids = []
diff --git a/test/testcases/restful_api/test_document_raw_routes.py b/test/testcases/restful_api/test_document_raw_routes.py
index 07f65230df..d1ff2520f1 100644
--- a/test/testcases/restful_api/test_document_raw_routes.py
+++ b/test/testcases/restful_api/test_document_raw_routes.py
@@ -15,6 +15,8 @@
 #
 
 import pytest
+from test.testcases.configs import INVALID_API_TOKEN
+from test.testcases.restful_api.helpers.client import RestClient
 
 
 @pytest.mark.p2
@@ -26,6 +28,26 @@ def test_document_image_invalid_id_contract(rest_client_noauth):
     assert payload["message"] == "Image not found.", payload
 
 
+@pytest.mark.p2
+def test_document_download_by_id_requires_auth(create_document):
+    _dataset_id, document_id = create_document("document_raw_download_auth.txt")
+    for scenario_name, client in (("missing token", RestClient(token=None)), ("invalid token", RestClient(token=INVALID_API_TOKEN))):
+        res = client.get(f"/documents/{document_id}")
+        assert res.status_code == 401, (scenario_name, res.text)
+        payload = res.json()
+        assert payload["code"] == 401, (scenario_name, payload)
+        assert payload["message"] == "<Unauthorized '401: Unauthorized'>", (scenario_name, payload)
+
+
+@pytest.mark.p2
+def test_document_download_by_id_invalid_id_contract(rest_client):
+    res = rest_client.get("/documents/invalid_document_id")
+    assert res.status_code == 200
+    payload = res.json()
+    assert payload["code"] == 102, payload
+    assert payload["message"] == "The dataset not own the document invalid_document_id.", payload
+
+
 @pytest.mark.p2
 def test_document_artifact_requires_auth(rest_client_noauth):
     res = rest_client_noauth.get("/documents/artifact/not-an-artifact.txt")
diff --git a/test/testcases/restful_api/test_documents.py b/test/testcases/restful_api/test_documents.py
index d0b7718cf8..05a1743d0c 100644
--- a/test/testcases/restful_api/test_documents.py
+++ b/test/testcases/restful_api/test_documents.py
@@ -24,7 +24,7 @@ from openpyxl import Workbook
 import pytest
 import requests
 from requests_toolbelt import MultipartEncoder
-from test.testcases.configs import DOCUMENT_NAME_LIMIT, HOST_ADDRESS, INVALID_API_TOKEN, VERSION
+from test.testcases.configs import DEFAULT_PARSER_CONFIG, DOCUMENT_NAME_LIMIT, HOST_ADDRESS, INVALID_API_TOKEN, INVALID_ID_32, VERSION
 from test.testcases.restful_api.helpers.client import RestClient
 from test.testcases.utils import compare_by_hash
 from test.testcases.utils.file_utils import (
@@ -81,6 +81,19 @@ def _seed_documents(rest_client, create_dataset, tmp_path, count=5):
     return dataset_id, payload["data"]
 
 
+def _seed_documents_for_update(rest_client, create_dataset, tmp_path):
+    dataset_id = create_dataset("dataset_update_contract")
+    file_paths = [
+        create_txt_file(tmp_path / "ragflow_test_upload_0.txt"),
+        create_txt_file(tmp_path / "ragflow_test_upload_1.txt"),
+    ]
+    res = _upload_files(rest_client, dataset_id, file_paths)
+    assert res.status_code == 200
+    payload = res.json()
+    assert payload["code"] == 0, payload
+    return dataset_id, payload["data"]
+
+
 def _assert_docs_sorted(docs, key, reverse):
     values = [doc.get(key) for doc in docs]
     assert values == sorted(values, reverse=reverse)
@@ -509,6 +522,315 @@ def test_documents_update_patch_and_delete(rest_client, create_document):
     assert all(doc["id"] != document_id for doc in list_payload["data"]["docs"]), list_payload
 
 
+@pytest.mark.p2
+def test_documents_update_requires_auth(create_document):
+    dataset_id, document_id = create_document("update_auth_target.txt")
+    for scenario_name, client in (("missing token", RestClient(token=None)), ("invalid token", RestClient(token=INVALID_API_TOKEN))):
+        res = client.patch(
+            f"/datasets/{dataset_id}/documents/{document_id}",
+            json={"name": "updated_auth_target.txt"},
+        )
+        assert res.status_code == 401, (scenario_name, res.text)
+        body = res.json()
+        assert body["code"] == 401, (scenario_name, body)
+        assert body["message"] == "<Unauthorized '401: Unauthorized'>", (scenario_name, body)
+
+
+@pytest.mark.p2
+def test_documents_update_name_contract(rest_client, create_dataset, tmp_path):
+    dataset_id, uploaded_docs = _seed_documents_for_update(rest_client, create_dataset, tmp_path)
+    first_document_id = uploaded_docs[0]["id"]
+
+    long_name = f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt"
+    name_cases = [
+        ("new_name.txt", 0, ""),
+        (long_name, 0, ""),
+        (0, 102, "Field: <name> - Message: <Input should be a valid string> - Value: <0>"),
+        (None, 100, "AttributeError('NoneType' object has no attribute 'encode')"),
+        ("", 101, "The extension of file can't be changed"),
+        ("ragflow_test_upload_0", 101, "The extension of file can't be changed"),
+        ("ragflow_test_upload_1.txt", 102, "Duplicated document name in the same dataset."),
+        ("RAGFLOW_TEST_UPLOAD_1.TXT", 0, ""),
+    ]
+    for name, expected_code, expected_message in name_cases:
+        res = rest_client.patch(
+            f"/datasets/{dataset_id}/documents/{first_document_id}",
+            json={"name": name},
+        )
+        assert res.status_code == 200, (name, res.text)
+        body = res.json()
+        assert body["code"] == expected_code, (name, body)
+        if expected_code == 0:
+            assert body["data"]["name"] == name, (name, body)
+            list_res = rest_client.get(f"/datasets/{dataset_id}/documents", params={"id": first_document_id})
+            assert list_res.status_code == 200, (name, list_res.text)
+            list_body = list_res.json()
+            assert list_body["code"] == 0, (name, list_body)
+            assert list_body["data"]["docs"][0]["name"] == name, (name, list_body)
+        else:
+            assert body["message"] == expected_message, (name, body)
+
+
+@pytest.mark.p2
+def test_documents_update_invalid_dataset_and_document_contract(rest_client, create_dataset, tmp_path):
+    dataset_id, uploaded_docs = _seed_documents_for_update(rest_client, create_dataset, tmp_path)
+    first_document_id = uploaded_docs[0]["id"]
+
+    invalid_dataset_res = rest_client.patch(
+        f"/datasets/{INVALID_ID_32}/documents/{first_document_id}",
+        json={"name": "new_name.txt"},
+    )
+    assert invalid_dataset_res.status_code == 200
+    invalid_dataset_body = invalid_dataset_res.json()
+    assert invalid_dataset_body["code"] == 102, invalid_dataset_body
+    assert "You don't own the dataset." in invalid_dataset_body["message"], invalid_dataset_body
+
+    invalid_document_res = rest_client.patch(
+        f"/datasets/{dataset_id}/documents/{INVALID_ID_32}",
+        json={"name": "new_name.txt"},
+    )
+    assert invalid_document_res.status_code == 200
+    invalid_document_body = invalid_document_res.json()
+    assert invalid_document_body["code"] == 102, invalid_document_body
+    assert invalid_document_body["message"] == "The dataset doesn't own the document.", invalid_document_body
+
+
+@pytest.mark.p2
+def test_documents_update_chunk_method_contract(rest_client, create_dataset, tmp_path):
+    dataset_id, uploaded_docs = _seed_documents_for_update(rest_client, create_dataset, tmp_path)
+    first_document_id = uploaded_docs[0]["id"]
+
+    chunk_method_cases = [
+        ("naive", 0, ""),
+        ("manual", 0, ""),
+        ("qa", 0, ""),
+        ("table", 0, ""),
+        ("paper", 0, ""),
+        ("book", 0, ""),
+        ("laws", 0, ""),
+        ("presentation", 0, ""),
+        ("picture", 0, ""),
+        ("one", 0, ""),
+        ("knowledge_graph", 0, ""),
+        ("email", 0, ""),
+        ("tag", 0, ""),
+        ("", 102, "`chunk_method` (empty string) is not valid"),
+        (
+            "other_chunk_method",
+            102,
+            "Field: <chunk_method> - Message: <`chunk_method` other_chunk_method doesn't exist> - Value: <other_chunk_method>",
+        ),
+    ]
+    for chunk_method, expected_code, expected_message in chunk_method_cases:
+        res = rest_client.patch(
+            f"/datasets/{dataset_id}/documents/{first_document_id}",
+            json={"chunk_method": chunk_method},
+        )
+        assert res.status_code == 200, (chunk_method, res.text)
+        body = res.json()
+        assert body["code"] == expected_code, (chunk_method, body)
+        if expected_code == 0:
+            list_res = rest_client.get(f"/datasets/{dataset_id}/documents", params={"id": first_document_id})
+            assert list_res.status_code == 200, (chunk_method, list_res.text)
+            list_body = list_res.json()
+            assert list_body["code"] == 0, (chunk_method, list_body)
+            assert list_body["data"]["docs"][0]["chunk_method"] == chunk_method, (chunk_method, list_body)
+        else:
+            assert body["message"] == expected_message, (chunk_method, body)
+
+
+@pytest.mark.p2
+def test_documents_update_meta_fields_contract(rest_client, create_dataset, tmp_path):
+    dataset_id, uploaded_docs = _seed_documents_for_update(rest_client, create_dataset, tmp_path)
+    first_document_id = uploaded_docs[0]["id"]
+
+    meta_fields_cases = [
+        ({"test": "test"}, 0, ""),
+        ({"author": "alice", "year": 2024}, 0, ""),
+        ({"tags": ["tag1", "tag2"]}, 0, ""),
+        ({"count": 42, "price": 19.99}, 0, ""),
+        ("test", 102, "Field: <meta_fields> - Message: <Input should be a valid dictionary> - Value: <test>"),
+        ([], 102, "Field: <meta_fields> - Message: <Input should be a valid dictionary> - Value: <[]>"),
+        ({"tags": [{"x": {"a": "b"}}]}, 102, "Field: <meta_fields> - Message: <The type is not supported in list: [{'x': {'a': 'b'}}]> - Value: <{'tags': [{'x': {'a': 'b'}}]}>"),
+        ({"tags": [{"x": 1}]}, 102, "Field: <meta_fields> - Message: <The type is not supported in list: [{'x': 1}]> - Value: <{'tags': [{'x': 1}]}>"),
+        ({"obj": {"x": 1}}, 102, "Field: <meta_fields> - Message: <The type is not supported: {'x': 1}> - Value: <{'obj': {'x': 1}}>"),
+        ({"tags": [2, 1]}, 0, ""),
+    ]
+    for meta_fields, expected_code, expected_message in meta_fields_cases:
+        res = rest_client.patch(
+            f"/datasets/{dataset_id}/documents/{first_document_id}",
+            json={"meta_fields": meta_fields},
+        )
+        assert res.status_code == 200, (meta_fields, res.text)
+        body = res.json()
+        assert body["code"] == expected_code, (meta_fields, body)
+        if expected_code == 0:
+            list_res = rest_client.get(f"/datasets/{dataset_id}/documents", params={"id": first_document_id})
+            assert list_res.status_code == 200, (meta_fields, list_res.text)
+            list_body = list_res.json()
+            assert list_body["code"] == 0, (meta_fields, list_body)
+            assert list_body["data"]["docs"][0]["meta_fields"] == meta_fields, (meta_fields, list_body)
+        else:
+            assert expected_message in body["message"] or body["message"] == expected_message, (meta_fields, body)
+
+    invalid_meta_doc_res = rest_client.patch(
+        f"/datasets/{dataset_id}/documents/invalid_doc_id_12345678901234567890",
+        json={"meta_fields": {"author": "alice"}},
+    )
+    assert invalid_meta_doc_res.status_code == 200
+    invalid_meta_doc_body = invalid_meta_doc_res.json()
+    assert invalid_meta_doc_body["code"] == 102, invalid_meta_doc_body
+    assert "The dataset doesn't own the document." in invalid_meta_doc_body["message"], invalid_meta_doc_body
+
+
+@pytest.mark.p2
+def test_documents_update_invalid_field_and_guard_contract(rest_client, create_dataset, tmp_path):
+    dataset_id, uploaded_docs = _seed_documents_for_update(rest_client, create_dataset, tmp_path)
+    first_document_id = uploaded_docs[0]["id"]
+
+    strict_guard_cases = [
+        ({"chunk_count": 1}, 102, "Can't change `chunk_count`."),
+        ({"token_count": 1}, 102, "Can't change `token_count`."),
+        ({"chunk_count": 100}, 102, "Can't change `chunk_count`."),
+        ({"token_count": 100}, 102, "Can't change `token_count`."),
+        ({"progress": 2.0}, 102, "Field: <progress> - Message: <Input should be less than or equal to 1> - Value: <2.0>"),
+        ({"progress": 1.0}, 102, "Can't change `progress`."),
+        ({"meta_fields": []}, 102, "Field: <meta_fields> - Message: <Input should be a valid dictionary> - Value: <[]>"),
+    ]
+    for payload, expected_code, expected_message in strict_guard_cases:
+        res = rest_client.patch(
+            f"/datasets/{dataset_id}/documents/{first_document_id}",
+            json=payload,
+        )
+        assert res.status_code == 200, (payload, res.text)
+        body = res.json()
+        assert body["code"] == expected_code, (payload, body)
+        assert expected_message in body["message"] or body["message"] == expected_message, (payload, body)
+
+    legacy_invalid_field_cases = [
+        {"create_date": "Fri, 14 Mar 2025 16:53:42 GMT"},
+        {"create_time": 1},
+        {"created_by": "ragflow_test"},
+        {"dataset_id": "ragflow_test"},
+        {"id": "ragflow_test"},
+        {"location": "ragflow_test.txt"},
+        {"process_begin_at": 1},
+        {"process_duration": 1.0},
+        {"progress_msg": "ragflow_test"},
+        {"run": "ragflow_test"},
+        {"size": 1},
+        {"source_type": "ragflow_test"},
+        {"thumbnail": "ragflow_test"},
+        {"type": "ragflow_test"},
+        {"update_date": "Fri, 14 Mar 2025 16:33:17 GMT"},
+        {"update_time": 1},
+    ]
+    for payload in legacy_invalid_field_cases:
+        res = rest_client.patch(
+            f"/datasets/{dataset_id}/documents/{first_document_id}",
+            json=payload,
+        )
+        assert res.status_code == 200, (payload, res.text)
+        body = res.json()
+        assert body["code"] in (0, 102), (payload, body)
+        if body["code"] == 102:
+            assert "invalid" in body["message"].lower(), (payload, body)
+        else:
+            assert "data" in body, (payload, body)
+
+
+@pytest.mark.p2
+def test_documents_update_parser_config_contract(rest_client, create_dataset, tmp_path):
+    dataset_id, uploaded_docs = _seed_documents_for_update(rest_client, create_dataset, tmp_path)
+    first_document_id = uploaded_docs[0]["id"]
+    default_parser_config_for_test = {
+        "layout_recognize": "DeepDOC",
+        "chunk_token_num": 512,
+        "delimiter": "\n",
+        "auto_keywords": 0,
+        "auto_questions": 0,
+        "html4excel": False,
+        "topn_tags": 3,
+        "raptor": {
+            "use_raptor": True,
+            "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n      {cluster_content}\nThe above is the content you need to summarize.",
+            "max_token": 256,
+            "threshold": 0.1,
+            "max_cluster": 64,
+            "random_seed": 0,
+        },
+        "graphrag": {
+            "use_graphrag": True,
+            "entity_types": ["organization", "person", "geo", "event", "category"],
+            "method": "light",
+            "batch_chunk_token_size": 4096,
+        },
+    }
+
+    parser_cases = [
+        ({}, 0, ""),
+        (default_parser_config_for_test, 0, ""),
+        ({"chunk_token_num": -1}, 102, "Field: <parser_config.chunk_token_num> - Message: <Input should be greater than or equal to 1> - Value: <-1>"),
+        ({"chunk_token_num": 0}, 102, "Field: <parser_config.chunk_token_num> - Message: <Input should be greater than or equal to 1> - Value: <0>"),
+        ({"chunk_token_num": 100000000}, 102, "Field: <parser_config.chunk_token_num> - Message: <Input should be less than or equal to 2048> - Value: <100000000>"),
+        ({"chunk_token_num": 3.14}, 102, "Field: <parser_config.chunk_token_num> - Message: <Input should be a valid integer> - Value: <3.14>"),
+        ({"chunk_token_num": "1024"}, 102, "Field: <parser_config.chunk_token_num> - Message: <Input should be a valid integer> - Value: <1024>"),
+        ({"layout_recognize": "DeepDOC"}, 0, ""),
+        ({"layout_recognize": "Naive"}, 0, ""),
+        ({"html4excel": True}, 0, ""),
+        ({"html4excel": False}, 0, ""),
+        ({"html4excel": 1}, 102, "Field: <parser_config.html4excel> - Message: <Input should be a valid boolean> - Value: <1>"),
+        ({"delimiter": ""}, 102, "Field: <parser_config.delimiter> - Message: <String should have at least 1 character> - Value: <>"),
+        ({"delimiter": "`##`"}, 0, ""),
+        ({"delimiter": 1}, 102, "Field: <parser_config.delimiter> - Message: <Input should be a valid string> - Value: <1>"),
+        ({"task_page_size": -1}, 102, "Field: <parser_config.task_page_size> - Message: <Input should be greater than or equal to 1> - Value: <-1>"),
+        ({"task_page_size": 0}, 102, "Field: <parser_config.task_page_size> - Message: <Input should be greater than or equal to 1> - Value: <0>"),
+        ({"task_page_size": 100000000}, 0, ""),
+        ({"task_page_size": 3.14}, 102, "Field: <parser_config.task_page_size> - Message: <Input should be a valid integer> - Value: <3.14>"),
+        ({"task_page_size": "1024"}, 102, "Field: <parser_config.task_page_size> - Message: <Input should be a valid integer> - Value: <1024>"),
+        ({"raptor": {"use_raptor": {"a": "b"}}}, 102, "Field: <parser_config.raptor.use_raptor> - Message: <Input should be a valid boolean> - Value: <{'a': 'b'}>"),
+        ({"raptor": {"use_raptor": False}}, 0, ""),
+        ({"invalid_key": "invalid_value"}, 102, "Field: <parser_config.invalid_key> - Message: <Extra inputs are not permitted> - Value: <invalid_value>"),
+        ({"auto_keywords": -1}, 102, "Field: <parser_config.auto_keywords> - Message: <Input should be greater than or equal to 0> - Value: <-1>"),
+        ({"auto_keywords": 32}, 0, ""),
+        ({"auto_keywords": "1024"}, 102, "Field: <parser_config.auto_keywords> - Message: <Input should be a valid integer> - Value: <1024>"),
+        ({"auto_keywords": 3.14}, 102, "Field: <parser_config.auto_keywords> - Message: <Input should be a valid integer> - Value: <3.14>"),
+        ({"auto_questions": -1}, 102, "Field: <parser_config.auto_questions> - Message: <Input should be greater than or equal to 0> - Value: <-1>"),
+        ({"auto_questions": 10}, 0, ""),
+        ({"auto_questions": 3.14}, 102, "Field: <parser_config.auto_questions> - Message: <Input should be a valid integer> - Value: <3.14>"),
+        ({"auto_questions": "1024"}, 102, "Field: <parser_config.auto_questions> - Message: <Input should be a valid integer> - Value: <1024>"),
+        ({"topn_tags": -1}, 102, "Field: <parser_config.topn_tags> - Message: <Input should be greater than or equal to 1> - Value: <-1>"),
+        ({"topn_tags": 10}, 0, ""),
+        ({"topn_tags": 3.14}, 102, "Field: <parser_config.topn_tags> - Message: <Input should be a valid integer> - Value: <3.14>"),
+        ({"topn_tags": "1024"}, 102, "Field: <parser_config.topn_tags> - Message: <Input should be a valid integer> - Value: <1024>"),
+    ]
+    for parser_config, expected_code, expected_message in parser_cases:
+        res = rest_client.patch(
+            f"/datasets/{dataset_id}/documents/{first_document_id}",
+            json={"chunk_method": "naive", "parser_config": parser_config},
+        )
+        assert res.status_code == 200, (parser_config, res.text)
+        body = res.json()
+        assert body["code"] == expected_code, (parser_config, body)
+        if expected_code == 0:
+            list_res = rest_client.get(f"/datasets/{dataset_id}/documents", params={"id": first_document_id})
+            assert list_res.status_code == 200, (parser_config, list_res.text)
+            list_body = list_res.json()
+            assert list_body["code"] == 0, (parser_config, list_body)
+            doc_parser_config = list_body["data"]["docs"][0]["parser_config"]
+            if parser_config == {}:
+                assert doc_parser_config == DEFAULT_PARSER_CONFIG, (parser_config, list_body)
+            else:
+                for key, value in parser_config.items():
+                    if isinstance(value, dict):
+                        for sub_key, sub_value in value.items():
+                            assert doc_parser_config[key][sub_key] == sub_value, (parser_config, list_body)
+                    else:
+                        assert doc_parser_config[key] == value, (parser_config, list_body)
+        else:
+            assert body["message"] == expected_message, (parser_config, body)
+
+
 @pytest.mark.p2
 def test_documents_parse_and_stop(rest_client, create_document):
     dataset_id, document_id = create_document("parse_target.txt")
@@ -533,6 +855,187 @@ def test_documents_parse_and_stop(rest_client, create_document):
         assert "already completed" in stop_payload["message"], stop_payload
 
 
+@pytest.mark.p2
+def test_documents_metadata_batch_update_contract(rest_client, create_dataset, tmp_path):
+    dataset_id, uploaded_docs = _seed_documents(rest_client, create_dataset, tmp_path, count=5)
+    document_ids = [doc["id"] for doc in uploaded_docs]
+
+    for scenario_name, client in (("missing token", RestClient(token=None)), ("invalid token", RestClient(token=INVALID_API_TOKEN))):
+        res = client.patch(
+            f"/datasets/{dataset_id}/documents/metadatas",
+            json={"selector": {"document_ids": document_ids[:1]}, "updates": [], "deletes": []},
+        )
+        assert res.status_code == 401, (scenario_name, res.text)
+        payload = res.json()
+        assert payload["code"] == 401, (scenario_name, payload)
+        assert payload["message"] == "<Unauthorized '401: Unauthorized'>", (scenario_name, payload)
+
+    invalid_dataset_res = rest_client.patch(
+        "/datasets/invalid_dataset_id/documents/metadatas",
+        json={"selector": {"document_ids": []}, "updates": [], "deletes": []},
+    )
+    assert invalid_dataset_res.status_code == 200
+    invalid_dataset_payload = invalid_dataset_res.json()
+    assert invalid_dataset_payload["code"] == 102, invalid_dataset_payload
+    assert invalid_dataset_payload["message"] == "You don't own the dataset invalid_dataset_id.", invalid_dataset_payload
+
+    validation_cases = [
+        ("selector not object", {"selector": [1], "updates": [], "deletes": []}, 102, "selector must be an object."),
+        ("updates not list", {"selector": {}, "updates": {"key": "value"}, "deletes": []}, 102, "updates and deletes must be lists."),
+        ("metadata condition not object", {"selector": {"metadata_condition": [1]}, "updates": [], "deletes": []}, 102, "metadata_condition must be an object."),
+        ("document ids not list", {"selector": {"document_ids": "doc-1"}, "updates": [], "deletes": []}, 102, "document_ids must be a list."),
+        ("update missing key", {"selector": {}, "updates": [{"key": ""}], "deletes": []}, 102, "Each update requires key and value."),
+        ("delete missing key", {"selector": {}, "updates": [], "deletes": [{"x": "y"}]}, 102, "Each delete requires key."),
+        (
+            "document ids wrong dataset",
+            {"selector": {"document_ids": ["doc-does-not-exist-1", "doc-does-not-exist-2"]}, "updates": [{"key": "author", "value": "test"}], "deletes": []},
+            102,
+            f"These documents do not belong to dataset {dataset_id}: ",
+        ),
+    ]
+    for scenario_name, payload, expected_code, expected_message in validation_cases:
+        res = rest_client.patch(f"/datasets/{dataset_id}/documents/metadatas", json=payload)
+        assert res.status_code == 200, (scenario_name, res.text)
+        body = res.json()
+        assert body["code"] == expected_code, (scenario_name, body)
+        if scenario_name == "document ids wrong dataset":
+            assert body["message"].startswith(expected_message), (scenario_name, body)
+            invalid_ids = set(body["message"][len(expected_message) :].split(", "))
+            assert invalid_ids == {"doc-does-not-exist-1", "doc-does-not-exist-2"}, (scenario_name, body)
+        else:
+            assert body["message"] == expected_message, (scenario_name, body)
+
+    update_by_ids_res = rest_client.patch(
+        f"/datasets/{dataset_id}/documents/metadatas",
+        json={
+            "selector": {"document_ids": document_ids},
+            "updates": [{"key": "author", "value": "test_author"}, {"key": "status", "value": "processed"}],
+            "deletes": [],
+        },
+    )
+    assert update_by_ids_res.status_code == 200
+    update_by_ids_payload = update_by_ids_res.json()
+    assert update_by_ids_payload["code"] == 0, update_by_ids_payload
+    assert update_by_ids_payload["data"] == {"updated": 5, "matched_docs": 5}, update_by_ids_payload
+
+    filtered_update_res = rest_client.patch(
+        f"/datasets/{dataset_id}/documents/metadatas",
+        json={
+            "selector": {
+                "document_ids": document_ids,
+                "metadata_condition": {"conditions": [{"comparison_operator": "is", "name": "status", "value": "processed"}]},
+            },
+            "updates": [{"key": "author", "value": "filtered_author"}],
+            "deletes": [],
+        },
+    )
+    assert filtered_update_res.status_code == 200
+    filtered_update_payload = filtered_update_res.json()
+    assert filtered_update_payload["code"] == 0, filtered_update_payload
+    assert filtered_update_payload["data"] == {"updated": 5, "matched_docs": 5}, filtered_update_payload
+
+    delete_metadata_res = rest_client.patch(
+        f"/datasets/{dataset_id}/documents/metadatas",
+        json={
+            "selector": {"document_ids": document_ids},
+            "updates": [],
+            "deletes": [{"key": "author"}],
+        },
+    )
+    assert delete_metadata_res.status_code == 200
+    delete_metadata_payload = delete_metadata_res.json()
+    assert delete_metadata_payload["code"] == 0, delete_metadata_payload
+    assert delete_metadata_payload["data"] == {"updated": 5, "matched_docs": 5}, delete_metadata_payload
+
+    combined_res = rest_client.patch(
+        f"/datasets/{dataset_id}/documents/metadatas",
+        json={
+            "selector": {"document_ids": document_ids},
+            "updates": [{"key": "author", "value": "new_author"}],
+            "deletes": [{"key": "status"}],
+        },
+    )
+    assert combined_res.status_code == 200
+    combined_payload = combined_res.json()
+    assert combined_payload["code"] == 0, combined_payload
+    assert combined_payload["data"] == {"updated": 5, "matched_docs": 5}, combined_payload
+
+    empty_ids_res = rest_client.patch(
+        f"/datasets/{dataset_id}/documents/metadatas",
+        json={"selector": {"document_ids": []}, "updates": [{"key": "author", "value": "test"}], "deletes": []},
+    )
+    assert empty_ids_res.status_code == 200
+    empty_ids_payload = empty_ids_res.json()
+    assert empty_ids_payload["code"] == 0, empty_ids_payload
+    assert empty_ids_payload["data"] == {"updated": 0, "matched_docs": 0}, empty_ids_payload
+
+    no_match_res = rest_client.patch(
+        f"/datasets/{dataset_id}/documents/metadatas",
+        json={
+            "selector": {
+                "document_ids": document_ids,
+                "metadata_condition": {"conditions": [{"comparison_operator": "is", "name": "nonexistent_key", "value": "nonexistent_value"}]},
+            },
+            "updates": [{"key": "author", "value": "test"}],
+            "deletes": [],
+        },
+    )
+    assert no_match_res.status_code == 200
+    no_match_payload = no_match_res.json()
+    assert no_match_payload["code"] == 0, no_match_payload
+    assert no_match_payload["data"] == {"updated": 0, "matched_docs": 0}, no_match_payload
+
+
+@pytest.mark.p2
+def test_document_metadata_config_contract(rest_client, create_document):
+    dataset_id, document_id = create_document("document_metadata_config_contract.txt")
+
+    for scenario_name, client in (("missing token", RestClient(token=None)), ("invalid token", RestClient(token=INVALID_API_TOKEN))):
+        res = client.put(
+            f"/datasets/{dataset_id}/documents/{document_id}/metadata/config",
+            json={"metadata": {"author": "alice"}},
+        )
+        assert res.status_code == 401, (scenario_name, res.text)
+        payload = res.json()
+        assert payload["code"] == 401, (scenario_name, payload)
+        assert payload["message"] == "<Unauthorized '401: Unauthorized'>", (scenario_name, payload)
+
+    missing_payload_res = rest_client.put(f"/datasets/{dataset_id}/documents/{document_id}/metadata/config", json={})
+    assert missing_payload_res.status_code == 200
+    missing_payload = missing_payload_res.json()
+    assert missing_payload["code"] == 101, missing_payload
+    assert missing_payload["message"] == "metadata is required", missing_payload
+
+    invalid_dataset_res = rest_client.put(
+        f"/datasets/{INVALID_ID_32}/documents/{document_id}/metadata/config",
+        json={"metadata": {"author": "alice"}},
+    )
+    assert invalid_dataset_res.status_code == 200
+    invalid_dataset_payload = invalid_dataset_res.json()
+    assert invalid_dataset_payload["code"] == 102, invalid_dataset_payload
+    assert invalid_dataset_payload["message"] == "You don't own the dataset.", invalid_dataset_payload
+
+    invalid_document_res = rest_client.put(
+        f"/datasets/{dataset_id}/documents/{INVALID_ID_32}/metadata/config",
+        json={"metadata": {"author": "alice"}},
+    )
+    assert invalid_document_res.status_code == 200
+    invalid_document_payload = invalid_document_res.json()
+    assert invalid_document_payload["code"] == 102, invalid_document_payload
+    assert invalid_document_payload["message"] == f"Document {INVALID_ID_32} not found in dataset {dataset_id}", invalid_document_payload
+
+    update_payload = {"metadata": {"author": "alice", "tags": ["one", "two"]}}
+    update_res = rest_client.put(
+        f"/datasets/{dataset_id}/documents/{document_id}/metadata/config",
+        json=update_payload,
+    )
+    assert update_res.status_code == 200
+    update_body = update_res.json()
+    assert update_body["code"] == 0, update_body
+    parser_config = update_body["data"]["parser_config"]
+    assert parser_config["metadata"] == update_payload["metadata"], update_body
+
+
 @pytest.mark.p2
 def test_documents_metadata_update_path(rest_client, create_document):
     dataset_id, document_id = create_document("metadata_target.txt")
diff --git a/test/testcases/restful_api/test_retrieval.py b/test/testcases/restful_api/test_retrieval.py
index 6bfe40b28e..3c7eca7d10 100644
--- a/test/testcases/restful_api/test_retrieval.py
+++ b/test/testcases/restful_api/test_retrieval.py
@@ -275,6 +275,45 @@ def test_retrieval_vector_similarity_and_top_k_contract(rest_client, ensure_pars
             assert expected_message in body["message"], (scenario_name, body)
 
 
+@pytest.mark.p2
+def test_retrieval_document_ids_and_metadata_condition_contract(rest_client, ensure_parsed_document):
+    dataset_id, document_id = ensure_parsed_document()
+
+    invalid_doc_ids_res = rest_client.post(
+        "/retrieval",
+        json={"question": "chunk", "dataset_ids": [dataset_id], "document_ids": "bad"},
+    )
+    assert invalid_doc_ids_res.status_code == 200
+    invalid_doc_ids_payload = invalid_doc_ids_res.json()
+    assert invalid_doc_ids_payload["code"] == 102, invalid_doc_ids_payload
+    assert invalid_doc_ids_payload["message"] == "`documents` should be a list", invalid_doc_ids_payload
+
+    not_owned_doc_res = rest_client.post(
+        "/retrieval",
+        json={"question": "chunk", "dataset_ids": [dataset_id], "document_ids": ["not-owned"]},
+    )
+    assert not_owned_doc_res.status_code == 200
+    not_owned_doc_payload = not_owned_doc_res.json()
+    assert not_owned_doc_payload["code"] == 102, not_owned_doc_payload
+    assert not_owned_doc_payload["message"] == "The datasets don't own the document not-owned", not_owned_doc_payload
+
+    metadata_condition_res = rest_client.post(
+        "/retrieval",
+        json={
+            "question": "chunk",
+            "dataset_ids": [dataset_id],
+            "metadata_condition": {
+                "logic": "and",
+                "conditions": [{"name": "author", "comparison_operator": "is", "value": "missing"}],
+            },
+        },
+    )
+    assert metadata_condition_res.status_code == 200
+    metadata_condition_payload = metadata_condition_res.json()
+    assert metadata_condition_payload["code"] == 0, metadata_condition_payload
+    assert metadata_condition_payload["data"]["chunks"] == [], metadata_condition_payload
+
+
 @pytest.mark.p2
 def test_retrieval_rerank_unknown_contract(rest_client, ensure_parsed_document):
     dataset_id, _ = ensure_parsed_document()