diff --git a/api/apps/restful_apis/agent_api.py b/api/apps/restful_apis/agent_api.py index efaf3285d9..0eda78f107 100644 --- a/api/apps/restful_apis/agent_api.py +++ b/api/apps/restful_apis/agent_api.py @@ -58,6 +58,7 @@ from api.utils.api_utils import ( server_error_response, validate_request, ) +from api.utils.pagination_utils import validate_rest_api_page_size from common import settings from common.ssrf_guard import assert_host_is_safe from common.constants import RetCode @@ -349,7 +350,7 @@ def list_agent_sessions(agent_id, tenant_id): session_id = request.args.get("id") user_id = request.args.get("user_id") page_number = int(request.args.get("page", 1)) - items_per_page = int(request.args.get("page_size", 30)) + items_per_page = validate_rest_api_page_size(int(request.args.get("page_size", 30))) keywords = request.args.get("keywords") from_date = request.args.get("from_date") to_date = request.args.get("to_date") @@ -517,7 +518,7 @@ def list_agents(tenant_id): tags = [item for item in request.args.get("tags", "").strip().split(",") if item] page_number = int(request.args.get("page", 0)) - items_per_page = int(request.args.get("page_size", 0)) + items_per_page = validate_rest_api_page_size(int(request.args.get("page_size", 0))) order_by = request.args.get("orderby", "create_time") desc = str(request.args.get("desc", "true")).lower() != "false" tenants = TenantService.get_joined_tenants_by_user_id(tenant_id) diff --git a/api/apps/restful_apis/chat_api.py b/api/apps/restful_apis/chat_api.py index 164eae4883..a46b93d423 100644 --- a/api/apps/restful_apis/chat_api.py +++ b/api/apps/restful_apis/chat_api.py @@ -45,6 +45,7 @@ from api.utils.api_utils import ( server_error_response, validate_request, ) +from api.utils.pagination_utils import validate_rest_api_page_size from api.utils.tenant_utils import ensure_tenant_model_id_for_params from common.constants import LLMType, RetCode, StatusEnum from common import settings @@ -419,7 +420,7 @@ async def list_chats(): try: page_number = int(request.args.get("page", 0)) - items_per_page = int(request.args.get("page_size", 0)) + items_per_page = validate_rest_api_page_size(int(request.args.get("page_size", 0))) tenants = TenantService.get_joined_tenants_by_user_id(current_user.id) authorized_owner_ids = {member["tenant_id"] for member in tenants} @@ -769,7 +770,7 @@ async def list_sessions(chat_id): code=RetCode.AUTHENTICATION_ERROR, ) page_number = int(request.args.get("page", 1)) - items_per_page = int(request.args.get("page_size", 30)) + items_per_page = validate_rest_api_page_size(int(request.args.get("page_size", 30))) orderby = request.args.get("orderby", "create_time") desc = request.args.get("desc", "true").lower() != "false" session_id = request.args.get("id") diff --git a/api/apps/restful_apis/chunk_api.py b/api/apps/restful_apis/chunk_api.py index 3774a37461..8bb677372a 100644 --- a/api/apps/restful_apis/chunk_api.py +++ b/api/apps/restful_apis/chunk_api.py @@ -46,6 +46,7 @@ from api.utils.api_utils import ( server_error_response, token_required, ) +from api.utils.pagination_utils import validate_rest_api_page_size from api.utils.image_utils import store_chunk_image from api.utils.reference_metadata_utils import ( enrich_chunks_with_document_metadata, @@ -244,7 +245,7 @@ async def retrieval_test(tenant_id): if "question" not in req: return get_error_data_result("`question` is required.") page = int(req.get("page", 1)) - size = int(req.get("page_size", 30)) + size = validate_rest_api_page_size(int(req.get("page_size", 30))) question = req["question"].strip() if isinstance(req["question"], str) else req["question"] if not question: return get_result(data={"total": 0, "chunks": [], "doc_aggs": {}}) @@ -365,7 +366,7 @@ async def list_chunks(tenant_id, dataset_id, document_id): doc = doc[0] req = request.args page = int(req.get("page", 1)) - size = int(req.get("page_size", 30)) + size = validate_rest_api_page_size(int(req.get("page_size", 30))) question = req.get("keywords", "") query = { "doc_ids": [document_id], diff --git a/api/apps/restful_apis/connector_api.py b/api/apps/restful_apis/connector_api.py index 89287a706d..5e799cd814 100644 --- a/api/apps/restful_apis/connector_api.py +++ b/api/apps/restful_apis/connector_api.py @@ -27,6 +27,7 @@ from google_auth_oauthlib.flow import Flow from api.db import InputType from api.db.services.connector_service import ConnectorService, SyncLogsService from api.utils.api_utils import get_data_error_result, get_json_result, get_request_json, validate_request +from api.utils.pagination_utils import validate_rest_api_page_size from common.constants import RetCode, TaskStatus from common.data_source.config import GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI, GMAIL_WEB_OAUTH_REDIRECT_URI, BOX_WEB_OAUTH_REDIRECT_URI, DocumentSource from common.data_source.google_util.constant import WEB_OAUTH_POPUP_TEMPLATE, GOOGLE_SCOPES @@ -140,7 +141,11 @@ def list_logs(connector_id): return _connector_auth_error(connector_id, current_user.id) req = request.args.to_dict(flat=True) - arr, total = SyncLogsService.list_sync_tasks(connector_id, int(req.get("page", 1)), int(req.get("page_size", 15))) + arr, total = SyncLogsService.list_sync_tasks( + connector_id, + int(req.get("page", 1)), + validate_rest_api_page_size(int(req.get("page_size", 15))), + ) return get_json_result(data={"total": total, "logs": arr}) diff --git a/api/apps/restful_apis/dataset_api.py b/api/apps/restful_apis/dataset_api.py index df1862592c..f1c5797c34 100644 --- a/api/apps/restful_apis/dataset_api.py +++ b/api/apps/restful_apis/dataset_api.py @@ -20,6 +20,7 @@ from quart import request from common.constants import RetCode from api.apps import login_required, current_user from api.utils.api_utils import get_error_argument_result, get_error_data_result, get_json_result, get_result, add_tenant_id_to_kwargs +from api.utils.pagination_utils import validate_rest_api_page_size from api.utils.validation_utils import ( CreateDatasetReq, DeleteDatasetReq, @@ -665,7 +666,7 @@ async def check_embedding(tenant_id, dataset_id): def list_ingestion_logs(tenant_id, dataset_id): try: page = int(request.args.get("page", 0)) - page_size = int(request.args.get("page_size", 0)) + page_size = validate_rest_api_page_size(int(request.args.get("page_size", 0))) orderby = request.args.get("orderby", "create_time") desc = request.args.get("desc", "true").lower() != "false" operation_status = request.args.getlist("operation_status") diff --git a/api/apps/restful_apis/document_api.py b/api/apps/restful_apis/document_api.py index cf1b5341a8..da4643faf1 100644 --- a/api/apps/restful_apis/document_api.py +++ b/api/apps/restful_apis/document_api.py @@ -41,6 +41,7 @@ from api.common.check_team_permission import check_kb_team_permission from api.db.services.task_service import TaskService, cancel_all_task_of from api.utils.api_utils import construct_json_result, get_data_error_result, get_error_data_result, get_result, get_json_result, \ server_error_response, add_tenant_id_to_kwargs, get_request_json, get_error_argument_result, check_duplicate_ids +from api.utils.pagination_utils import validate_rest_api_page_size from api.utils.validation_utils import ( UpdateDocumentReq, format_validation_error_message, validate_and_parse_json_request, DeleteDocumentReq, ) @@ -795,7 +796,7 @@ def _get_docs_with_request(req, dataset_id:str): q = req.args page = int(q.get("page", 1)) - page_size = int(q.get("page_size", 30)) + page_size = validate_rest_api_page_size(int(q.get("page_size", 30))) orderby = q.get("orderby", "create_time") desc = str(q.get("desc", "true")).strip().lower() != "false" diff --git a/api/apps/restful_apis/mcp_api.py b/api/apps/restful_apis/mcp_api.py index b3f39fa4bf..39b78fa9d2 100644 --- a/api/apps/restful_apis/mcp_api.py +++ b/api/apps/restful_apis/mcp_api.py @@ -21,6 +21,7 @@ from api.db.db_models import MCPServer from api.db.services.mcp_server_service import MCPServerService from api.db.services.user_service import TenantService from api.utils.api_utils import get_data_error_result, get_json_result, get_mcp_tools, get_request_json, server_error_response, validate_request +from api.utils.pagination_utils import validate_rest_api_page_size from api.utils.web_utils import get_float, safe_json_parse from common.constants import VALID_MCP_SERVER_TYPES from common.mcp_tool_call_conn import MCPToolCallSession, close_multiple_mcp_toolcall_sessions @@ -71,7 +72,7 @@ def _assert_mcp_url_is_safe(url, invalid_message: str = "Invalid url.") -> tuple async def list_mcp() -> Response: keywords = request.args.get("keywords", "") page_number = int(request.args.get("page", 0)) - items_per_page = int(request.args.get("page_size", 0)) + items_per_page = validate_rest_api_page_size(int(request.args.get("page_size", 0))) orderby = request.args.get("orderby", "create_time") if request.args.get("desc", "true").lower() == "false": desc = False diff --git a/api/apps/restful_apis/memory_api.py b/api/apps/restful_apis/memory_api.py index 1be67b8a70..53319bb6dd 100644 --- a/api/apps/restful_apis/memory_api.py +++ b/api/apps/restful_apis/memory_api.py @@ -24,6 +24,7 @@ from api.apps import login_required, current_user from api.utils.api_utils import validate_request, get_request_json, get_error_argument_result, get_json_result from api.apps.services import memory_api_service from api.utils.tenant_utils import ensure_tenant_model_id_for_params +from api.utils.pagination_utils import validate_rest_api_page_size @manager.route("/memories", methods=["POST"]) # noqa: F821 @@ -134,7 +135,7 @@ async def list_memory(): } keywords = request.args.get("keywords") page = int(request.args.get("page", 1)) - page_size = int(request.args.get("page_size", 50)) + page_size = validate_rest_api_page_size(int(request.args.get("page_size", 50))) try: res = await memory_api_service.list_memory(filter_params, keywords, page, page_size) return get_json_result(message=True, data=res) @@ -167,7 +168,7 @@ async def get_memory_messages(memory_id): keywords = args.get("keywords", "") keywords = keywords.strip() page = int(args.get("page", 1)) - page_size = int(args.get("page_size", 50)) + page_size = validate_rest_api_page_size(int(args.get("page_size", 50))) try: res = await memory_api_service.get_memory_messages( memory_id, agent_ids, keywords, page, page_size diff --git a/api/apps/restful_apis/search_api.py b/api/apps/restful_apis/search_api.py index 2693dad95b..e921cebd45 100644 --- a/api/apps/restful_apis/search_api.py +++ b/api/apps/restful_apis/search_api.py @@ -30,6 +30,7 @@ from api.db.services.user_service import TenantService, UserTenantService from common.misc_utils import get_uuid from common.constants import RetCode, StatusEnum from api.utils.api_utils import get_data_error_result, get_json_result, get_request_json, server_error_response, validate_request +from api.utils.pagination_utils import validate_rest_api_page_size def _full_text_weight(vector_similarity_weight): @@ -77,7 +78,7 @@ async def create(): def list_searches(): keywords = request.args.get("keywords", "") page_number = int(request.args.get("page", 0)) - items_per_page = int(request.args.get("page_size", 0)) + items_per_page = validate_rest_api_page_size(int(request.args.get("page_size", 0))) orderby = request.args.get("orderby", "create_time") desc = request.args.get("desc", "true").lower() != "false" owner_ids = request.args.getlist("owner_ids") diff --git a/api/utils/pagination_utils.py b/api/utils/pagination_utils.py new file mode 100644 index 0000000000..8f38eec63a --- /dev/null +++ b/api/utils/pagination_utils.py @@ -0,0 +1,24 @@ +# +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +REST_API_MAX_PAGE_SIZE = 100 + + +def validate_rest_api_page_size(page_size: int) -> int: + """Validate REST API page_size values against the public maximum.""" + if page_size > REST_API_MAX_PAGE_SIZE: + raise ValueError(f"page_size must be less than or equal to {REST_API_MAX_PAGE_SIZE}") + return page_size diff --git a/api/utils/validation_utils.py b/api/utils/validation_utils.py index 11bafbcd18..b200e5014e 100644 --- a/api/utils/validation_utils.py +++ b/api/utils/validation_utils.py @@ -29,6 +29,7 @@ from werkzeug.exceptions import BadRequest, UnsupportedMediaType from api.constants import DATASET_NAME_LIMIT, FILE_NAME_LEN_LIMIT from api.db import FileType +from api.utils.pagination_utils import validate_rest_api_page_size from common.constants import RetCode @@ -960,6 +961,11 @@ class BaseListReq(BaseModel): """Validate and normalize an optional list filter id.""" return validate_uuid1_hex(v) + @field_validator("page_size") + @classmethod + def validate_page_size(cls, v: int) -> int: + return validate_rest_api_page_size(v) + class ListDatasetReq(BaseListReq): """Request model for listing datasets.""" @@ -1010,10 +1016,15 @@ class ListFileReq(BaseModel): parent_id: Annotated[str | None, Field(default=None)] keywords: Annotated[str, Field(default="")] page: Annotated[int, Field(default=1, ge=1)] - page_size: Annotated[int, Field(default=15, ge=1, le=100)] + page_size: Annotated[int, Field(default=15, ge=1)] orderby: Annotated[str, Field(default="create_time")] desc: Annotated[bool, Field(default=True)] + @field_validator("page_size") + @classmethod + def validate_page_size(cls, v: int) -> int: + return validate_rest_api_page_size(v) + def validate_immutable_fields(update_doc_req: UpdateDocumentReq, doc): """ diff --git a/test/playwright/conftest.py b/test/playwright/conftest.py index 6b62636193..d421064151 100644 --- a/test/playwright/conftest.py +++ b/test/playwright/conftest.py @@ -1189,7 +1189,7 @@ def _ensure_dataset_ready_via_api( base_url: str, auth_header: str, dataset_name: str ) -> dict: headers = {"Authorization": auth_header} - list_url = _build_url(base_url, "/api/v1/datasets?page=1&page_size=200") + list_url = _build_url(base_url, "/api/v1/datasets?page=1&page_size=100") _, list_payload = _api_request_json(list_url, method="GET", headers=headers) existing = _find_dataset_by_name(list_payload, dataset_name) diff --git a/test/testcases/restful_api/test_chunks.py b/test/testcases/restful_api/test_chunks.py index c483658449..67faf9f539 100644 --- a/test/testcases/restful_api/test_chunks.py +++ b/test/testcases/restful_api/test_chunks.py @@ -499,7 +499,7 @@ def test_chunk_delete_concurrent_and_bulk_contract(rest_client, create_document) for index in range(40): payload = rest_client.post(base_path, json={"content": f"bulk chunk {index}"}).json() assert payload["code"] == 0, payload - bulk_ids_payload = rest_client.get(base_path, params={"page_size": 200}).json() + bulk_ids_payload = rest_client.get(base_path, params={"page_size": 100}).json() assert bulk_ids_payload["code"] == 0, bulk_ids_payload bulk_ids = [chunk["id"] for chunk in bulk_ids_payload["data"]["chunks"]] bulk_res = rest_client.delete(base_path, json={"chunk_ids": bulk_ids}) diff --git a/test/testcases/test_http_api/common.py b/test/testcases/test_http_api/common.py index f62cf6338d..499baee818 100644 --- a/test/testcases/test_http_api/common.py +++ b/test/testcases/test_http_api/common.py @@ -59,7 +59,7 @@ def delete_datasets(auth, payload=None, *, headers=HEADERS, data=None): return res.json() -def delete_all_datasets(auth, *, page_size=1000): +def delete_all_datasets(auth, *, page_size=100): return delete_datasets(auth, {"ids": None, "delete_all": True}) @@ -133,7 +133,7 @@ def delete_documents(auth, dataset_id, payload=None): return res.json() -def delete_all_documents(auth, dataset_id, *, page_size=1000): +def delete_all_documents(auth, dataset_id, *, page_size=100): return delete_documents(auth, dataset_id, {"ids": None, "delete_all": True}) @@ -192,7 +192,7 @@ def delete_chunks(auth, dataset_id, document_id, payload=None): return res.json() -def delete_all_chunks(auth, dataset_id, document_id, *, page_size=1000): +def delete_all_chunks(auth, dataset_id, document_id, *, page_size=100): return delete_chunks(auth, dataset_id, document_id, {"chunk_ids": None, "delete_all": True}) @@ -247,7 +247,7 @@ def delete_chat_assistants(auth, payload=None): return res.json() -def delete_all_chat_assistants(auth, *, page_size=1000): +def delete_all_chat_assistants(auth, *, page_size=100): return delete_chat_assistants(auth, {"ids": None, "delete_all": True}) @@ -284,7 +284,7 @@ def delete_session_with_chat_assistants(auth, chat_assistant_id, payload=None): return res.json() -def delete_all_sessions_with_chat_assistant(auth, chat_assistant_id, *, page_size=1000): +def delete_all_sessions_with_chat_assistant(auth, chat_assistant_id, *, page_size=100): return delete_session_with_chat_assistants(auth, chat_assistant_id, {"ids": None, "delete_all": True}) @@ -378,7 +378,7 @@ def delete_agent_sessions(auth, agent_id, payload=None): return res.json() -def delete_all_agent_sessions(auth, agent_id, *, page_size=1000): +def delete_all_agent_sessions(auth, agent_id, *, page_size=100): return delete_agent_sessions(auth, agent_id, {"ids": None, "delete_all": True}) @@ -525,4 +525,3 @@ def search_dataset(auth, dataset_id, payload=None, *, headers=HEADERS): return res.json() - diff --git a/test/testcases/test_http_api/test_session_management/test_agent_completions.py b/test/testcases/test_http_api/test_session_management/test_agent_completions.py index 6e332436ad..18f0392471 100644 --- a/test/testcases/test_http_api/test_session_management/test_agent_completions.py +++ b/test/testcases/test_http_api/test_session_management/test_agent_completions.py @@ -58,7 +58,7 @@ def _agent_items(res): @pytest.fixture(scope="function") def agent_id(HttpApiAuth, request): - res = list_agents(HttpApiAuth, {"page_size": 1000}) + res = list_agents(HttpApiAuth, {"page_size": 100}) assert res["code"] == 0, res for agent in _agent_items(res): if agent.get("title") == AGENT_TITLE: diff --git a/test/testcases/test_http_api/test_session_management/test_agent_sessions.py b/test/testcases/test_http_api/test_session_management/test_agent_sessions.py index 7d47954573..6c606c9686 100644 --- a/test/testcases/test_http_api/test_session_management/test_agent_sessions.py +++ b/test/testcases/test_http_api/test_session_management/test_agent_sessions.py @@ -58,7 +58,7 @@ def _agent_items(res): @pytest.fixture(scope="function") def agent_id(HttpApiAuth, request): - res = list_agents(HttpApiAuth, {"page_size": 1000}) + res = list_agents(HttpApiAuth, {"page_size": 100}) assert res["code"] == 0, res for agent in _agent_items(res): if agent.get("title") == AGENT_TITLE: diff --git a/test/testcases/test_sdk_api/common.py b/test/testcases/test_sdk_api/common.py index eebb835238..4de02830d0 100644 --- a/test/testcases/test_sdk_api/common.py +++ b/test/testcases/test_sdk_api/common.py @@ -25,11 +25,11 @@ def batch_create_datasets(client: RAGFlow, num: int) -> list[DataSet]: return [client.create_dataset(name=f"dataset_{i}") for i in range(num)] -def delete_all_datasets(client: RAGFlow, *, page_size: int = 1000) -> None: +def delete_all_datasets(client: RAGFlow, *, page_size: int = 100) -> None: client.delete_datasets(delete_all=True) -def delete_all_chats(client: RAGFlow, *, page_size: int = 1000) -> None: +def delete_all_chats(client: RAGFlow, *, page_size: int = 100) -> None: client.delete_chats(delete_all=True) @@ -45,15 +45,15 @@ def bulk_upload_documents(dataset: DataSet, num: int, tmp_path: Path) -> list[Do return dataset.upload_documents(document_infos) -def delete_all_documents(dataset: DataSet, *, page_size: int = 1000) -> None: +def delete_all_documents(dataset: DataSet, *, page_size: int = 100) -> None: dataset.delete_documents(delete_all=True) -def delete_all_sessions(chat_assistant: Chat, *, page_size: int = 1000) -> None: +def delete_all_sessions(chat_assistant: Chat, *, page_size: int = 100) -> None: chat_assistant.delete_sessions(delete_all=True) -def delete_all_chunks(document: Document, *, page_size: int = 1000) -> None: +def delete_all_chunks(document: Document, *, page_size: int = 100) -> None: document.delete_chunks(delete_all=True) diff --git a/test/testcases/test_sdk_api/conftest.py b/test/testcases/test_sdk_api/conftest.py index 511842fb9d..32c389edb6 100644 --- a/test/testcases/test_sdk_api/conftest.py +++ b/test/testcases/test_sdk_api/conftest.py @@ -48,7 +48,7 @@ from utils.file_utils import ( @wait_for(200, 1, "Document parsing timeout") def condition(_dataset: DataSet): - documents = _dataset.list_documents(page_size=1000) + documents = _dataset.list_documents(page_size=100) for document in documents: if document.run != "DONE": return False diff --git a/test/testcases/test_sdk_api/test_chat_assistant_management/conftest.py b/test/testcases/test_sdk_api/test_chat_assistant_management/conftest.py index 4d1a419e68..19502f977a 100644 --- a/test/testcases/test_sdk_api/test_chat_assistant_management/conftest.py +++ b/test/testcases/test_sdk_api/test_chat_assistant_management/conftest.py @@ -22,7 +22,7 @@ from utils import wait_for @wait_for(200, 1, "Document parsing timeout") def condition(_dataset: DataSet): - documents = _dataset.list_documents(page_size=1000) + documents = _dataset.list_documents(page_size=100) for document in documents: if document.run != "DONE": return False diff --git a/test/testcases/test_sdk_api/test_chunk_management_within_dataset/conftest.py b/test/testcases/test_sdk_api/test_chunk_management_within_dataset/conftest.py index 225cfe45b1..57bbd879a0 100644 --- a/test/testcases/test_sdk_api/test_chunk_management_within_dataset/conftest.py +++ b/test/testcases/test_sdk_api/test_chunk_management_within_dataset/conftest.py @@ -25,7 +25,7 @@ from utils import wait_for @wait_for(30, 1, "Document parsing timeout") def condition(_dataset: DataSet): - documents = _dataset.list_documents(page_size=1000) + documents = _dataset.list_documents(page_size=100) for document in documents: if document.run != "DONE": return False @@ -33,7 +33,7 @@ def condition(_dataset: DataSet): @wait_for(30, 1, "Chunk indexing timeout") def chunks_visible(_document: Document, _chunk_ids: list[str]): - visible_ids = {chunk.id for chunk in _document.list_chunks(page_size=1000)} + visible_ids = {chunk.id for chunk in _document.list_chunks(page_size=100)} return set(_chunk_ids).issubset(visible_ids) @pytest.fixture(scope="function") diff --git a/test/testcases/test_sdk_api/test_dataset_mangement/test_create_dataset.py b/test/testcases/test_sdk_api/test_dataset_mangement/test_create_dataset.py index 92505aec5d..3f32de77cc 100644 --- a/test/testcases/test_sdk_api/test_dataset_mangement/test_create_dataset.py +++ b/test/testcases/test_sdk_api/test_dataset_mangement/test_create_dataset.py @@ -51,7 +51,10 @@ class TestCapability: for i in range(count): payload = {"name": f"dataset_{i}"} client.create_dataset(**payload) - assert len(client.list_datasets(page_size=2000)) == count + datasets = [] + for page in range(1, (count // 100) + 1): + datasets.extend(client.list_datasets(page=page, page_size=100)) + assert len(datasets) == count @pytest.mark.p3 def test_create_dataset_concurrent(self, client): diff --git a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py index 97a9106628..7003dab6f2 100644 --- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py +++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_parse_documents.py @@ -23,7 +23,7 @@ from utils import wait_for @wait_for(30, 1, "Document parsing timeout") def condition(_dataset: DataSet, _document_ids: list[str] = None): - documents = _dataset.list_documents(page_size=1000) + documents = _dataset.list_documents(page_size=100) if _document_ids is None: for document in documents: @@ -40,7 +40,7 @@ def condition(_dataset: DataSet, _document_ids: list[str] = None): def validate_document_details(dataset, document_ids): - documents = dataset.list_documents(page_size=1000) + documents = dataset.list_documents(page_size=100) for document in documents: if document.id in document_ids: assert document.run == "DONE" diff --git a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_stop_parse_documents.py b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_stop_parse_documents.py index 8454ed9471..3cc4051fb3 100644 --- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_stop_parse_documents.py +++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_stop_parse_documents.py @@ -18,7 +18,7 @@ import pytest def validate_document_parse_done(dataset, document_ids): - documents = dataset.list_documents(page_size=1000) + documents = dataset.list_documents(page_size=100) for document in documents: if document.id in document_ids: assert document.run == "DONE" @@ -29,7 +29,7 @@ def validate_document_parse_done(dataset, document_ids): def validate_document_parse_cancel(dataset, document_ids): - documents = dataset.list_documents(page_size=1000) + documents = dataset.list_documents(page_size=100) for document in documents: assert document.run == "CANCEL" assert len(document.process_begin_at) > 0 diff --git a/test/testcases/test_web_api/conftest.py b/test/testcases/test_web_api/conftest.py index 1854103e3b..ab55cf7205 100644 --- a/test/testcases/test_web_api/conftest.py +++ b/test/testcases/test_web_api/conftest.py @@ -104,7 +104,7 @@ def require_env_flag(): @pytest.fixture(scope="function") def clear_datasets(request: FixtureRequest, WebApiAuth: RAGFlowWebApiAuth): def cleanup(): - res = list_datasets(WebApiAuth, params={"page_size": 1000}) + res = list_datasets(WebApiAuth, params={"page_size": 100}) kb_ids = [kb["id"] for kb in res["data"]] delete_datasets(WebApiAuth, {"ids": kb_ids}) @@ -122,7 +122,7 @@ def clear_chats(request, WebApiAuth): @pytest.fixture(scope="class") def add_dataset(request: FixtureRequest, WebApiAuth: RAGFlowWebApiAuth) -> str: def cleanup(): - res = list_datasets(WebApiAuth, params={"page_size": 1000}) + res = list_datasets(WebApiAuth, params={"page_size": 100}) kb_ids = [kb["id"] for kb in res["data"]] delete_datasets(WebApiAuth, {"ids": kb_ids}) @@ -133,7 +133,7 @@ def add_dataset(request: FixtureRequest, WebApiAuth: RAGFlowWebApiAuth) -> str: @pytest.fixture(scope="function") def add_dataset_func(request: FixtureRequest, WebApiAuth: RAGFlowWebApiAuth) -> str: def cleanup(): - res = list_datasets(WebApiAuth, params={"page_size": 1000}) + res = list_datasets(WebApiAuth, params={"page_size": 100}) kb_ids = [kb["id"] for kb in res["data"]] delete_datasets(WebApiAuth, {"ids": kb_ids}) diff --git a/test/unit_test/api/utils/test_doc_validation.py b/test/unit_test/api/utils/test_doc_validation.py index b068e2b499..31ce591ed5 100644 --- a/test/unit_test/api/utils/test_doc_validation.py +++ b/test/unit_test/api/utils/test_doc_validation.py @@ -17,7 +17,14 @@ """Unit tests for api.apps.sdk.doc_validation module.""" from unittest.mock import Mock + +import pytest +from pydantic import ValidationError + +from api.utils.pagination_utils import REST_API_MAX_PAGE_SIZE, validate_rest_api_page_size from api.utils.validation_utils import ( + ListDatasetReq, + ListFileReq, ParserConfig, UpdateDocumentReq, validate_chunk_method, @@ -29,6 +36,16 @@ from api.db import FileType from common.constants import RetCode +def test_rest_api_page_size_rejects_values_above_100(): + assert validate_rest_api_page_size(REST_API_MAX_PAGE_SIZE) == REST_API_MAX_PAGE_SIZE + with pytest.raises(ValueError, match="page_size must be less than or equal to 100"): + validate_rest_api_page_size(REST_API_MAX_PAGE_SIZE + 1) + with pytest.raises(ValidationError, match="page_size must be less than or equal to 100"): + ListDatasetReq(page_size=REST_API_MAX_PAGE_SIZE + 1) + with pytest.raises(ValidationError, match="page_size must be less than or equal to 100"): + ListFileReq(page_size=REST_API_MAX_PAGE_SIZE + 1) + + def test_validate_immutable_fields_no_changes(): """Test when no immutable fields are present in request.""" update_doc_req = UpdateDocumentReq() @@ -311,4 +328,4 @@ def test_parser_config_normalizes_legacy_vectorize_table_column_role(): "title": "indexing", "country": "metadata", "x": "both", - } \ No newline at end of file + }