diff --git a/api/apps/restful_apis/chunk_api.py b/api/apps/restful_apis/chunk_api.py index eae8ae2f41..8f16ea28f1 100644 --- a/api/apps/restful_apis/chunk_api.py +++ b/api/apps/restful_apis/chunk_api.py @@ -165,6 +165,11 @@ async def parse(tenant_id, dataset_id): dataset_tenant_id = _get_dataset_tenant_id(dataset_id) if not dataset_tenant_id: return get_error_data_result(message=f"You don't own the dataset {dataset_id}.") + e, kb = KnowledgebaseService.get_by_id(dataset_id) + if not e: + return get_error_data_result(message=f"You don't own the dataset {dataset_id}.") + if kb.pipeline_id: + return get_error_data_result(message="Datasets configured with an ingestion pipeline cannot be parsed with `/datasets/{dataset_id}/chunks`. Use `/documents/ingest` instead.", code=RetCode.ARGUMENT_ERROR) req = await get_request_json() if not req.get("document_ids"): return get_error_data_result("`document_ids` is required") diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 7d65c312e3..ba06d55860 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -1952,7 +1952,11 @@ Failure: **POST** `/api/v1/datasets/{dataset_id}/chunks` -Parses documents in a specified dataset. +Parses documents in a specified dataset using the built-in chunking pipeline. + +:::note +This endpoint only supports datasets that use the built-in chunking pipeline. For datasets configured with an ingestion pipeline, use `POST /api/v1/documents/ingest` instead. +::: #### Request @@ -2005,6 +2009,70 @@ Failure: --- +### Ingest documents + +**POST** `/api/v1/documents/ingest` + +Starts, cancels, or reruns ingestion for documents. Use this endpoint for documents in datasets configured with an ingestion pipeline. + +#### Request + +- Method: POST +- URL: `/api/v1/documents/ingest` +- Headers: + - `'Content-Type: application/json'` + - `'Authorization: Bearer '` +- Body: + - `"doc_ids"`: `list[string]` + - `"run"`: `string` + - `"delete"`: `boolean` + +##### Request example + +```bash +curl --request POST \ + --url http://{address}/api/v1/documents/ingest \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data ' + { + "doc_ids": ["97a5f1c2759811efaa500242ac120004"], + "run": "1", + "delete": true + }' +``` + +##### Request parameters + +- `"doc_ids"`: (*Body parameter*), `list[string]`, *Required* + The IDs of the documents to ingest. +- `"run"`: (*Body parameter*), `string`, *Required* + The ingestion action. Use `"1"` to start ingestion and `"2"` to cancel ingestion. +- `"delete"`: (*Body parameter*), `boolean` + Whether to delete existing tasks and chunks before rerunning. Defaults to `false`. + +#### Response + +Success: + +```json +{ + "code": 0, + "data": true +} +``` + +Failure: + +```json +{ + "code": 102, + "message": "Document not found!" +} +``` + +--- + ### Stop parsing documents **DELETE** `/api/v1/datasets/{dataset_id}/chunks`