2024-10-22 13:12:49 +08:00
|
|
|
#
|
2026-03-24 19:24:41 +08:00
|
|
|
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
2024-10-22 13:12:49 +08:00
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
#
|
2026-04-30 18:13:27 +03:00
|
|
|
import logging
|
2024-09-14 13:24:21 +08:00
|
|
|
from io import BytesIO
|
2025-06-05 12:46:29 +08:00
|
|
|
|
2026-03-24 19:24:41 +08:00
|
|
|
from quart import request, send_file
|
2025-06-05 12:46:29 +08:00
|
|
|
|
2026-04-22 10:49:52 +08:00
|
|
|
from api.db.db_models import APIToken, Document, Task
|
2026-03-24 19:24:41 +08:00
|
|
|
from api.db.joint_services.tenant_model_service import get_model_config_by_id, get_model_config_by_type_and_name, get_tenant_default_model_by_type
|
2026-01-28 13:29:34 +08:00
|
|
|
from api.db.services.doc_metadata_service import DocMetadataService
|
2026-03-24 19:24:41 +08:00
|
|
|
from api.db.services.document_service import DocumentService
|
2024-09-12 14:19:45 +08:00
|
|
|
from api.db.services.file2document_service import File2DocumentService
|
|
|
|
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
2025-08-13 16:41:01 +08:00
|
|
|
from api.db.services.llm_service import LLMBundle
|
2026-03-24 19:24:41 +08:00
|
|
|
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks
|
2025-08-13 16:41:01 +08:00
|
|
|
from api.db.services.tenant_llm_service import TenantLLMService
|
2026-04-09 11:17:38 +08:00
|
|
|
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_request_json, get_result, server_error_response, token_required
|
2026-03-24 19:24:41 +08:00
|
|
|
from common import settings
|
2026-04-23 14:17:23 +08:00
|
|
|
from common.constants import LLMType, RetCode, TaskStatus
|
2026-03-24 19:24:41 +08:00
|
|
|
from common.metadata_utils import convert_conditions, meta_filter
|
2025-02-26 15:40:52 +08:00
|
|
|
from rag.app.tag import label_question
|
2026-04-23 14:17:23 +08:00
|
|
|
from rag.nlp import search
|
2025-09-23 10:19:25 +08:00
|
|
|
from rag.prompts.generator import cross_languages, keyword_extraction
|
2024-09-12 14:19:45 +08:00
|
|
|
|
2024-10-30 16:15:42 +08:00
|
|
|
MAXIMUM_OF_UPLOADING_FILES = 256
|
|
|
|
|
|
|
|
|
|
|
2026-04-30 18:13:27 +03:00
|
|
|
from api.utils.reference_metadata_utils import (
|
|
|
|
|
enrich_chunks_with_document_metadata,
|
|
|
|
|
resolve_reference_metadata_preferences,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _resolve_reference_metadata(req: dict, search_config: dict | None = None):
|
|
|
|
|
return resolve_reference_metadata_preferences(req, search_config)
|
|
|
|
|
|
|
|
|
|
def _enrich_chunks_with_document_metadata(chunks: list[dict], metadata_fields=None) -> None:
|
|
|
|
|
enrich_chunks_with_document_metadata(chunks, metadata_fields)
|
|
|
|
|
|
|
|
|
|
|
2024-12-08 21:23:51 +08:00
|
|
|
@manager.route("/datasets/<dataset_id>/documents/<document_id>", methods=["GET"]) # noqa: F821
|
2024-09-12 14:19:45 +08:00
|
|
|
@token_required
|
2025-11-18 17:05:16 +08:00
|
|
|
async def download(tenant_id, dataset_id, document_id):
|
2024-11-04 08:35:36 +01:00
|
|
|
"""
|
|
|
|
|
Download a document from a dataset.
|
|
|
|
|
---
|
|
|
|
|
tags:
|
|
|
|
|
- Documents
|
|
|
|
|
security:
|
|
|
|
|
- ApiKeyAuth: []
|
|
|
|
|
produces:
|
|
|
|
|
- application/octet-stream
|
|
|
|
|
parameters:
|
|
|
|
|
- in: path
|
|
|
|
|
name: dataset_id
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: ID of the dataset.
|
|
|
|
|
- in: path
|
|
|
|
|
name: document_id
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: ID of the document to download.
|
|
|
|
|
- in: header
|
|
|
|
|
name: Authorization
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: Bearer token for authentication.
|
|
|
|
|
responses:
|
|
|
|
|
200:
|
|
|
|
|
description: Document file stream.
|
|
|
|
|
schema:
|
|
|
|
|
type: file
|
|
|
|
|
400:
|
|
|
|
|
description: Error message.
|
|
|
|
|
schema:
|
|
|
|
|
type: object
|
|
|
|
|
"""
|
2025-03-14 11:45:44 +08:00
|
|
|
if not document_id:
|
2025-06-05 12:46:29 +08:00
|
|
|
return get_error_data_result(message="Specify document_id please.")
|
2024-10-12 19:35:19 +08:00
|
|
|
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
2024-11-05 11:02:31 +08:00
|
|
|
return get_error_data_result(message=f"You do not own the dataset {dataset_id}.")
|
2024-10-12 19:35:19 +08:00
|
|
|
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
|
|
|
|
if not doc:
|
2025-06-05 12:46:29 +08:00
|
|
|
return get_error_data_result(message=f"The dataset not own the document {document_id}.")
|
2024-10-12 19:35:19 +08:00
|
|
|
# The process of downloading
|
2025-06-05 12:46:29 +08:00
|
|
|
doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
|
2025-11-06 09:36:38 +08:00
|
|
|
file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location)
|
2024-10-12 19:35:19 +08:00
|
|
|
if not file_stream:
|
2025-11-04 15:12:53 +08:00
|
|
|
return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
|
2024-10-12 19:35:19 +08:00
|
|
|
file = BytesIO(file_stream)
|
|
|
|
|
# Use send_file with a proper filename and MIME type
|
2025-11-18 17:05:16 +08:00
|
|
|
return await send_file(
|
2024-10-12 19:35:19 +08:00
|
|
|
file,
|
|
|
|
|
as_attachment=True,
|
2025-11-18 17:05:16 +08:00
|
|
|
attachment_filename=doc[0].name,
|
2024-11-04 08:35:36 +01:00
|
|
|
mimetype="application/octet-stream", # Set a default MIME type
|
2024-10-12 19:35:19 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-02-09 19:52:52 +08:00
|
|
|
@manager.route("/documents/<document_id>", methods=["GET"]) # noqa: F821
|
|
|
|
|
async def download_doc(document_id):
|
|
|
|
|
token = request.headers.get("Authorization").split()
|
|
|
|
|
if len(token) != 2:
|
2026-03-24 19:24:41 +08:00
|
|
|
return get_error_data_result(message="Authorization is not valid!")
|
2026-02-09 19:52:52 +08:00
|
|
|
token = token[1]
|
fix: authorize beta document downloads by tenant (#14496)
## Summary
This fixes a missing authorization check in the beta API document
download endpoint:
- **CWE:** CWE-862 (Missing Authorization)
- **Severity:** Medium
- **Affected route/file:** `GET /api/v1/documents/<document_id>` in
`api/apps/sdk/doc.py`
- **Data flow:** the route reads a bearer beta API token, resolves the
token with `APIToken.query(beta=token)`, accepts `document_id` directly
from the URL, loads the document with
`DocumentService.query(id=document_id)`, and then fetches the backing
object through `File2DocumentService.get_storage_address()` /
`settings.STORAGE_IMPL.get()`.
Before this change, that flow verified that the API token was valid, but
it did not verify that the token's tenant owned the document's knowledge
base. A caller with any valid beta API token and a known document ID
could therefore reach storage for a document belonging to another
tenant.
## Fix
The endpoint now takes the tenant ID from the resolved API token and
checks the document's knowledge base with:
```python
KnowledgebaseService.query(id=doc[0].kb_id, tenant_id=tenant_id)
```
If the knowledge base is not owned by the token tenant, the request
returns an access error before any storage lookup occurs. This mirrors
the tenant-scoped ownership checks used by the dataset-scoped document
download path and keeps the patch small.
## Tests
Added unit coverage for `download_doc()` to assert that:
- the beta token tenant ID is used in the knowledge-base ownership
lookup;
- cross-tenant access returns `You do not have access to this
document.`;
- storage resolution is not called before tenant authorization succeeds;
- the existing same-tenant empty-file and successful-download paths
still run after the authorization gate passes.
I also verified the final patch is limited to `api/apps/sdk/doc.py` and
the related document SDK route unit test. A local `pytest` invocation
could not complete in this checkout because the shared test fixture
attempts to log in to a RAGFlow server at `127.0.0.1:9380`, which was
not running in the local environment.
## Security analysis
This is exploitable when an attacker has a valid beta API token for
their own tenant and obtains or guesses a document ID from another
tenant. The token alone should not grant access to other tenants' files,
but the direct document route previously authorized only the token
itself and not the requested resource. The new tenant-scoped
knowledge-base check binds the requested document back to the token
tenant before storage is accessed, preventing cross-tenant document
downloads through this endpoint.
Before submitting, we attempted to disprove this by checking whether
existing dataset-scoped routes, token validation, or framework
protections already enforced ownership. They do not apply to this direct
document-ID route: it bypassed the dataset path parameter and used only
`DocumentService.query(id=document_id)` before reading storage.
cc @lewiswigmore
2026-05-06 07:55:41 +01:00
|
|
|
logging.info("Beta API token lookup attempted for document download")
|
2026-02-09 19:52:52 +08:00
|
|
|
objs = APIToken.query(beta=token)
|
|
|
|
|
if not objs:
|
fix: authorize beta document downloads by tenant (#14496)
## Summary
This fixes a missing authorization check in the beta API document
download endpoint:
- **CWE:** CWE-862 (Missing Authorization)
- **Severity:** Medium
- **Affected route/file:** `GET /api/v1/documents/<document_id>` in
`api/apps/sdk/doc.py`
- **Data flow:** the route reads a bearer beta API token, resolves the
token with `APIToken.query(beta=token)`, accepts `document_id` directly
from the URL, loads the document with
`DocumentService.query(id=document_id)`, and then fetches the backing
object through `File2DocumentService.get_storage_address()` /
`settings.STORAGE_IMPL.get()`.
Before this change, that flow verified that the API token was valid, but
it did not verify that the token's tenant owned the document's knowledge
base. A caller with any valid beta API token and a known document ID
could therefore reach storage for a document belonging to another
tenant.
## Fix
The endpoint now takes the tenant ID from the resolved API token and
checks the document's knowledge base with:
```python
KnowledgebaseService.query(id=doc[0].kb_id, tenant_id=tenant_id)
```
If the knowledge base is not owned by the token tenant, the request
returns an access error before any storage lookup occurs. This mirrors
the tenant-scoped ownership checks used by the dataset-scoped document
download path and keeps the patch small.
## Tests
Added unit coverage for `download_doc()` to assert that:
- the beta token tenant ID is used in the knowledge-base ownership
lookup;
- cross-tenant access returns `You do not have access to this
document.`;
- storage resolution is not called before tenant authorization succeeds;
- the existing same-tenant empty-file and successful-download paths
still run after the authorization gate passes.
I also verified the final patch is limited to `api/apps/sdk/doc.py` and
the related document SDK route unit test. A local `pytest` invocation
could not complete in this checkout because the shared test fixture
attempts to log in to a RAGFlow server at `127.0.0.1:9380`, which was
not running in the local environment.
## Security analysis
This is exploitable when an attacker has a valid beta API token for
their own tenant and obtains or guesses a document ID from another
tenant. The token alone should not grant access to other tenants' files,
but the direct document route previously authorized only the token
itself and not the requested resource. The new tenant-scoped
knowledge-base check binds the requested document back to the token
tenant before storage is accessed, preventing cross-tenant document
downloads through this endpoint.
Before submitting, we attempted to disprove this by checking whether
existing dataset-scoped routes, token validation, or framework
protections already enforced ownership. They do not apply to this direct
document-ID route: it bypassed the dataset path parameter and used only
`DocumentService.query(id=document_id)` before reading storage.
cc @lewiswigmore
2026-05-06 07:55:41 +01:00
|
|
|
logging.warning("Beta API token lookup failed for document download: invalid API key")
|
2026-02-09 19:52:52 +08:00
|
|
|
return get_error_data_result(message='Authentication error: API key is invalid!"')
|
fix: authorize beta document downloads by tenant (#14496)
## Summary
This fixes a missing authorization check in the beta API document
download endpoint:
- **CWE:** CWE-862 (Missing Authorization)
- **Severity:** Medium
- **Affected route/file:** `GET /api/v1/documents/<document_id>` in
`api/apps/sdk/doc.py`
- **Data flow:** the route reads a bearer beta API token, resolves the
token with `APIToken.query(beta=token)`, accepts `document_id` directly
from the URL, loads the document with
`DocumentService.query(id=document_id)`, and then fetches the backing
object through `File2DocumentService.get_storage_address()` /
`settings.STORAGE_IMPL.get()`.
Before this change, that flow verified that the API token was valid, but
it did not verify that the token's tenant owned the document's knowledge
base. A caller with any valid beta API token and a known document ID
could therefore reach storage for a document belonging to another
tenant.
## Fix
The endpoint now takes the tenant ID from the resolved API token and
checks the document's knowledge base with:
```python
KnowledgebaseService.query(id=doc[0].kb_id, tenant_id=tenant_id)
```
If the knowledge base is not owned by the token tenant, the request
returns an access error before any storage lookup occurs. This mirrors
the tenant-scoped ownership checks used by the dataset-scoped document
download path and keeps the patch small.
## Tests
Added unit coverage for `download_doc()` to assert that:
- the beta token tenant ID is used in the knowledge-base ownership
lookup;
- cross-tenant access returns `You do not have access to this
document.`;
- storage resolution is not called before tenant authorization succeeds;
- the existing same-tenant empty-file and successful-download paths
still run after the authorization gate passes.
I also verified the final patch is limited to `api/apps/sdk/doc.py` and
the related document SDK route unit test. A local `pytest` invocation
could not complete in this checkout because the shared test fixture
attempts to log in to a RAGFlow server at `127.0.0.1:9380`, which was
not running in the local environment.
## Security analysis
This is exploitable when an attacker has a valid beta API token for
their own tenant and obtains or guesses a document ID from another
tenant. The token alone should not grant access to other tenants' files,
but the direct document route previously authorized only the token
itself and not the requested resource. The new tenant-scoped
knowledge-base check binds the requested document back to the token
tenant before storage is accessed, preventing cross-tenant document
downloads through this endpoint.
Before submitting, we attempted to disprove this by checking whether
existing dataset-scoped routes, token validation, or framework
protections already enforced ownership. They do not apply to this direct
document-ID route: it bypassed the dataset path parameter and used only
`DocumentService.query(id=document_id)` before reading storage.
cc @lewiswigmore
2026-05-06 07:55:41 +01:00
|
|
|
if len(objs) > 1:
|
|
|
|
|
logging.error("Beta API token lookup is ambiguous for document download: matches=%s", len(objs))
|
|
|
|
|
return get_error_data_result(message="Authentication error: API key configuration is ambiguous.")
|
|
|
|
|
tenant_id = objs[0].tenant_id
|
|
|
|
|
logging.info("Beta API token authorized for document download: tenant_id=%s", tenant_id)
|
2026-03-24 19:24:41 +08:00
|
|
|
|
2026-02-09 19:52:52 +08:00
|
|
|
if not document_id:
|
|
|
|
|
return get_error_data_result(message="Specify document_id please.")
|
|
|
|
|
doc = DocumentService.query(id=document_id)
|
|
|
|
|
if not doc:
|
|
|
|
|
return get_error_data_result(message=f"The dataset not own the document {document_id}.")
|
fix: authorize beta document downloads by tenant (#14496)
## Summary
This fixes a missing authorization check in the beta API document
download endpoint:
- **CWE:** CWE-862 (Missing Authorization)
- **Severity:** Medium
- **Affected route/file:** `GET /api/v1/documents/<document_id>` in
`api/apps/sdk/doc.py`
- **Data flow:** the route reads a bearer beta API token, resolves the
token with `APIToken.query(beta=token)`, accepts `document_id` directly
from the URL, loads the document with
`DocumentService.query(id=document_id)`, and then fetches the backing
object through `File2DocumentService.get_storage_address()` /
`settings.STORAGE_IMPL.get()`.
Before this change, that flow verified that the API token was valid, but
it did not verify that the token's tenant owned the document's knowledge
base. A caller with any valid beta API token and a known document ID
could therefore reach storage for a document belonging to another
tenant.
## Fix
The endpoint now takes the tenant ID from the resolved API token and
checks the document's knowledge base with:
```python
KnowledgebaseService.query(id=doc[0].kb_id, tenant_id=tenant_id)
```
If the knowledge base is not owned by the token tenant, the request
returns an access error before any storage lookup occurs. This mirrors
the tenant-scoped ownership checks used by the dataset-scoped document
download path and keeps the patch small.
## Tests
Added unit coverage for `download_doc()` to assert that:
- the beta token tenant ID is used in the knowledge-base ownership
lookup;
- cross-tenant access returns `You do not have access to this
document.`;
- storage resolution is not called before tenant authorization succeeds;
- the existing same-tenant empty-file and successful-download paths
still run after the authorization gate passes.
I also verified the final patch is limited to `api/apps/sdk/doc.py` and
the related document SDK route unit test. A local `pytest` invocation
could not complete in this checkout because the shared test fixture
attempts to log in to a RAGFlow server at `127.0.0.1:9380`, which was
not running in the local environment.
## Security analysis
This is exploitable when an attacker has a valid beta API token for
their own tenant and obtains or guesses a document ID from another
tenant. The token alone should not grant access to other tenants' files,
but the direct document route previously authorized only the token
itself and not the requested resource. The new tenant-scoped
knowledge-base check binds the requested document back to the token
tenant before storage is accessed, preventing cross-tenant document
downloads through this endpoint.
Before submitting, we attempted to disprove this by checking whether
existing dataset-scoped routes, token validation, or framework
protections already enforced ownership. They do not apply to this direct
document-ID route: it bypassed the dataset path parameter and used only
`DocumentService.query(id=document_id)` before reading storage.
cc @lewiswigmore
2026-05-06 07:55:41 +01:00
|
|
|
if not KnowledgebaseService.query(id=doc[0].kb_id, tenant_id=tenant_id):
|
|
|
|
|
logging.warning(
|
|
|
|
|
"cross-tenant access denied for document download: tenant_id=%s kb_id=%s document_id=%s",
|
|
|
|
|
tenant_id,
|
|
|
|
|
doc[0].kb_id,
|
|
|
|
|
document_id,
|
|
|
|
|
)
|
|
|
|
|
return get_error_data_result(message="You do not have access to this document.")
|
2026-02-09 19:52:52 +08:00
|
|
|
# The process of downloading
|
|
|
|
|
doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
|
|
|
|
|
file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location)
|
|
|
|
|
if not file_stream:
|
|
|
|
|
return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
|
|
|
|
|
file = BytesIO(file_stream)
|
|
|
|
|
# Use send_file with a proper filename and MIME type
|
|
|
|
|
return await send_file(
|
|
|
|
|
file,
|
|
|
|
|
as_attachment=True,
|
|
|
|
|
attachment_filename=doc[0].name,
|
|
|
|
|
mimetype="application/octet-stream", # Set a default MIME type
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-03-02 10:44:33 +08:00
|
|
|
DOC_STOP_PARSING_INVALID_STATE_MESSAGE = "Can't stop parsing document that has not started or already completed"
|
|
|
|
|
DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE = "DOC_STOP_PARSING_INVALID_STATE"
|
|
|
|
|
|
|
|
|
|
|
2024-12-08 21:23:51 +08:00
|
|
|
@manager.route("/datasets/<dataset_id>/chunks", methods=["POST"]) # noqa: F821
|
2024-09-14 13:24:21 +08:00
|
|
|
@token_required
|
2025-11-18 17:05:16 +08:00
|
|
|
async def parse(tenant_id, dataset_id):
|
2024-11-04 08:35:36 +01:00
|
|
|
"""
|
|
|
|
|
Start parsing documents into chunks.
|
|
|
|
|
---
|
|
|
|
|
tags:
|
|
|
|
|
- Chunks
|
|
|
|
|
security:
|
|
|
|
|
- ApiKeyAuth: []
|
|
|
|
|
parameters:
|
|
|
|
|
- in: path
|
|
|
|
|
name: dataset_id
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: ID of the dataset.
|
|
|
|
|
- in: body
|
|
|
|
|
name: body
|
|
|
|
|
description: Parsing parameters.
|
|
|
|
|
required: true
|
|
|
|
|
schema:
|
|
|
|
|
type: object
|
|
|
|
|
properties:
|
|
|
|
|
document_ids:
|
|
|
|
|
type: array
|
|
|
|
|
items:
|
|
|
|
|
type: string
|
|
|
|
|
description: List of document IDs to parse.
|
|
|
|
|
- in: header
|
|
|
|
|
name: Authorization
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: Bearer token for authentication.
|
|
|
|
|
responses:
|
|
|
|
|
200:
|
|
|
|
|
description: Parsing started successfully.
|
|
|
|
|
schema:
|
|
|
|
|
type: object
|
|
|
|
|
"""
|
2024-11-07 19:26:03 +08:00
|
|
|
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
2024-11-05 11:02:31 +08:00
|
|
|
return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
|
2025-12-01 14:24:06 +08:00
|
|
|
req = await get_request_json()
|
2024-10-16 18:41:24 +08:00
|
|
|
if not req.get("document_ids"):
|
|
|
|
|
return get_error_data_result("`document_ids` is required")
|
2025-03-21 14:05:17 +08:00
|
|
|
doc_list = req.get("document_ids")
|
|
|
|
|
unique_doc_ids, duplicate_messages = check_duplicate_ids(doc_list, "document")
|
|
|
|
|
doc_list = unique_doc_ids
|
|
|
|
|
|
2025-03-19 12:18:19 +08:00
|
|
|
not_found = []
|
2025-03-21 14:05:17 +08:00
|
|
|
success_count = 0
|
|
|
|
|
for id in doc_list:
|
2024-11-04 08:35:36 +01:00
|
|
|
doc = DocumentService.query(id=id, kb_id=dataset_id)
|
2025-03-19 12:18:19 +08:00
|
|
|
if not doc:
|
|
|
|
|
not_found.append(id)
|
|
|
|
|
continue
|
2024-10-24 20:10:47 +08:00
|
|
|
if not doc:
|
2024-11-05 11:02:31 +08:00
|
|
|
return get_error_data_result(message=f"You don't own the document {id}.")
|
2025-03-19 12:18:19 +08:00
|
|
|
info = {"run": "1", "progress": 0, "progress_msg": "", "chunk_num": 0, "token_num": 0}
|
2026-04-02 18:50:56 +08:00
|
|
|
if (
|
|
|
|
|
DocumentService.filter_update(
|
|
|
|
|
[
|
|
|
|
|
Document.id == id,
|
|
|
|
|
((Document.run.is_null(True)) | (Document.run != TaskStatus.RUNNING.value)),
|
|
|
|
|
],
|
|
|
|
|
info,
|
|
|
|
|
)
|
|
|
|
|
== 0
|
|
|
|
|
):
|
|
|
|
|
return get_error_data_result("Can't parse document that is currently being processed")
|
2025-11-06 09:36:38 +08:00
|
|
|
settings.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), dataset_id)
|
2024-10-12 19:35:19 +08:00
|
|
|
TaskService.filter_delete([Task.doc_id == id])
|
|
|
|
|
e, doc = DocumentService.get_by_id(id)
|
|
|
|
|
doc = doc.to_dict()
|
|
|
|
|
doc["tenant_id"] = tenant_id
|
|
|
|
|
bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
|
2025-03-14 23:43:46 +08:00
|
|
|
queue_tasks(doc, bucket, name, 0)
|
2025-03-21 14:05:17 +08:00
|
|
|
success_count += 1
|
2025-03-19 12:18:19 +08:00
|
|
|
if not_found:
|
2025-11-04 15:12:53 +08:00
|
|
|
return get_result(message=f"Documents not found: {not_found}", code=RetCode.DATA_ERROR)
|
2025-03-21 14:05:17 +08:00
|
|
|
if duplicate_messages:
|
|
|
|
|
if success_count > 0:
|
2025-06-05 12:46:29 +08:00
|
|
|
return get_result(
|
|
|
|
|
message=f"Partially parsed {success_count} documents with {len(duplicate_messages)} errors",
|
|
|
|
|
data={"success_count": success_count, "errors": duplicate_messages},
|
|
|
|
|
)
|
2025-03-21 14:05:17 +08:00
|
|
|
else:
|
|
|
|
|
return get_error_data_result(message=";".join(duplicate_messages))
|
2025-03-19 12:18:19 +08:00
|
|
|
|
2024-10-12 19:35:19 +08:00
|
|
|
return get_result()
|
|
|
|
|
|
2024-11-04 08:35:36 +01:00
|
|
|
|
2024-12-08 21:23:51 +08:00
|
|
|
@manager.route("/datasets/<dataset_id>/chunks", methods=["DELETE"]) # noqa: F821
|
2024-09-14 13:24:21 +08:00
|
|
|
@token_required
|
2025-11-18 17:05:16 +08:00
|
|
|
async def stop_parsing(tenant_id, dataset_id):
|
2024-11-04 08:35:36 +01:00
|
|
|
"""
|
|
|
|
|
Stop parsing documents into chunks.
|
|
|
|
|
---
|
|
|
|
|
tags:
|
|
|
|
|
- Chunks
|
|
|
|
|
security:
|
|
|
|
|
- ApiKeyAuth: []
|
|
|
|
|
parameters:
|
|
|
|
|
- in: path
|
|
|
|
|
name: dataset_id
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: ID of the dataset.
|
|
|
|
|
- in: body
|
|
|
|
|
name: body
|
|
|
|
|
description: Stop parsing parameters.
|
|
|
|
|
required: true
|
|
|
|
|
schema:
|
|
|
|
|
type: object
|
|
|
|
|
properties:
|
|
|
|
|
document_ids:
|
|
|
|
|
type: array
|
|
|
|
|
items:
|
|
|
|
|
type: string
|
|
|
|
|
description: List of document IDs to stop parsing.
|
|
|
|
|
- in: header
|
|
|
|
|
name: Authorization
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: Bearer token for authentication.
|
|
|
|
|
responses:
|
|
|
|
|
200:
|
|
|
|
|
description: Parsing stopped successfully.
|
|
|
|
|
schema:
|
|
|
|
|
type: object
|
|
|
|
|
"""
|
2024-11-07 19:26:03 +08:00
|
|
|
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
2024-11-05 11:02:31 +08:00
|
|
|
return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
|
2025-12-01 14:24:06 +08:00
|
|
|
req = await get_request_json()
|
2025-03-21 14:05:17 +08:00
|
|
|
|
2024-10-16 18:41:24 +08:00
|
|
|
if not req.get("document_ids"):
|
|
|
|
|
return get_error_data_result("`document_ids` is required")
|
2025-03-21 14:05:17 +08:00
|
|
|
doc_list = req.get("document_ids")
|
|
|
|
|
unique_doc_ids, duplicate_messages = check_duplicate_ids(doc_list, "document")
|
|
|
|
|
doc_list = unique_doc_ids
|
|
|
|
|
|
|
|
|
|
success_count = 0
|
|
|
|
|
for id in doc_list:
|
2024-10-16 18:41:24 +08:00
|
|
|
doc = DocumentService.query(id=id, kb_id=dataset_id)
|
|
|
|
|
if not doc:
|
2024-11-05 11:02:31 +08:00
|
|
|
return get_error_data_result(message=f"You don't own the document {id}.")
|
2026-03-24 19:24:41 +08:00
|
|
|
if doc[0].run != TaskStatus.RUNNING.value:
|
2026-03-02 10:44:33 +08:00
|
|
|
return construct_json_result(
|
|
|
|
|
code=RetCode.DATA_ERROR,
|
|
|
|
|
message=DOC_STOP_PARSING_INVALID_STATE_MESSAGE,
|
|
|
|
|
data={"error_code": DOC_STOP_PARSING_INVALID_STATE_ERROR_CODE},
|
|
|
|
|
)
|
2025-12-04 19:29:06 +08:00
|
|
|
# Send cancellation signal via Redis to stop background task
|
|
|
|
|
cancel_all_task_of(id)
|
2024-11-04 08:35:36 +01:00
|
|
|
info = {"run": "2", "progress": 0, "chunk_num": 0}
|
2024-10-12 19:35:19 +08:00
|
|
|
DocumentService.update_by_id(id, info)
|
2025-11-06 09:36:38 +08:00
|
|
|
settings.docStoreConn.delete({"doc_id": doc[0].id}, search.index_name(tenant_id), dataset_id)
|
2025-03-21 14:05:17 +08:00
|
|
|
success_count += 1
|
|
|
|
|
if duplicate_messages:
|
|
|
|
|
if success_count > 0:
|
2025-06-05 12:46:29 +08:00
|
|
|
return get_result(
|
|
|
|
|
message=f"Partially stopped {success_count} documents with {len(duplicate_messages)} errors",
|
|
|
|
|
data={"success_count": success_count, "errors": duplicate_messages},
|
|
|
|
|
)
|
2025-03-21 14:05:17 +08:00
|
|
|
else:
|
|
|
|
|
return get_error_data_result(message=";".join(duplicate_messages))
|
2024-10-12 19:35:19 +08:00
|
|
|
return get_result()
|
|
|
|
|
|
|
|
|
|
|
2024-12-08 21:23:51 +08:00
|
|
|
@manager.route("/retrieval", methods=["POST"]) # noqa: F821
|
2024-09-18 11:08:19 +08:00
|
|
|
@token_required
|
2025-11-18 17:05:16 +08:00
|
|
|
async def retrieval_test(tenant_id):
|
2024-11-04 08:35:36 +01:00
|
|
|
"""
|
|
|
|
|
Retrieve chunks based on a query.
|
|
|
|
|
---
|
|
|
|
|
tags:
|
|
|
|
|
- Retrieval
|
|
|
|
|
security:
|
|
|
|
|
- ApiKeyAuth: []
|
|
|
|
|
parameters:
|
|
|
|
|
- in: body
|
|
|
|
|
name: body
|
|
|
|
|
description: Retrieval parameters.
|
|
|
|
|
required: true
|
|
|
|
|
schema:
|
|
|
|
|
type: object
|
|
|
|
|
properties:
|
|
|
|
|
dataset_ids:
|
|
|
|
|
type: array
|
|
|
|
|
items:
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: List of dataset IDs to search in.
|
|
|
|
|
question:
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: Query string.
|
|
|
|
|
document_ids:
|
|
|
|
|
type: array
|
|
|
|
|
items:
|
|
|
|
|
type: string
|
|
|
|
|
description: List of document IDs to filter.
|
|
|
|
|
similarity_threshold:
|
|
|
|
|
type: number
|
|
|
|
|
format: float
|
|
|
|
|
description: Similarity threshold.
|
|
|
|
|
vector_similarity_weight:
|
|
|
|
|
type: number
|
|
|
|
|
format: float
|
|
|
|
|
description: Vector similarity weight.
|
|
|
|
|
top_k:
|
|
|
|
|
type: integer
|
|
|
|
|
description: Maximum number of chunks to return.
|
|
|
|
|
highlight:
|
|
|
|
|
type: boolean
|
|
|
|
|
description: Whether to highlight matched content.
|
2025-09-05 11:12:15 +08:00
|
|
|
metadata_condition:
|
|
|
|
|
type: object
|
|
|
|
|
description: metadata filter condition.
|
2024-11-04 08:35:36 +01:00
|
|
|
- in: header
|
|
|
|
|
name: Authorization
|
|
|
|
|
type: string
|
|
|
|
|
required: true
|
|
|
|
|
description: Bearer token for authentication.
|
|
|
|
|
responses:
|
|
|
|
|
200:
|
|
|
|
|
description: Retrieval results.
|
|
|
|
|
schema:
|
|
|
|
|
type: object
|
|
|
|
|
properties:
|
|
|
|
|
chunks:
|
|
|
|
|
type: array
|
|
|
|
|
items:
|
|
|
|
|
type: object
|
|
|
|
|
properties:
|
|
|
|
|
id:
|
|
|
|
|
type: string
|
|
|
|
|
description: Chunk ID.
|
|
|
|
|
content:
|
|
|
|
|
type: string
|
|
|
|
|
description: Chunk content.
|
|
|
|
|
document_id:
|
|
|
|
|
type: string
|
|
|
|
|
description: ID of the document.
|
|
|
|
|
dataset_id:
|
|
|
|
|
type: string
|
|
|
|
|
description: ID of the dataset.
|
|
|
|
|
similarity:
|
|
|
|
|
type: number
|
|
|
|
|
format: float
|
|
|
|
|
description: Similarity score.
|
|
|
|
|
"""
|
2025-12-01 14:24:06 +08:00
|
|
|
req = await get_request_json()
|
2024-10-24 20:05:21 +08:00
|
|
|
if not req.get("dataset_ids"):
|
2024-11-01 22:59:17 +08:00
|
|
|
return get_error_data_result("`dataset_ids` is required.")
|
2024-10-24 20:05:21 +08:00
|
|
|
kb_ids = req["dataset_ids"]
|
2024-11-04 08:35:36 +01:00
|
|
|
if not isinstance(kb_ids, list):
|
2024-11-01 22:59:17 +08:00
|
|
|
return get_error_data_result("`dataset_ids` should be a list")
|
2024-10-24 20:05:21 +08:00
|
|
|
for id in kb_ids:
|
2024-11-07 19:26:03 +08:00
|
|
|
if not KnowledgebaseService.accessible(kb_id=id, user_id=tenant_id):
|
2024-10-24 20:05:21 +08:00
|
|
|
return get_error_data_result(f"You don't own the dataset {id}.")
|
2025-01-22 19:43:14 +08:00
|
|
|
kbs = KnowledgebaseService.get_by_ids(kb_ids)
|
2025-02-20 12:40:59 +08:00
|
|
|
embd_nms = list(set([TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs])) # remove vendor suffix for comparison
|
2024-10-21 14:29:06 +08:00
|
|
|
if len(embd_nms) != 1:
|
|
|
|
|
return get_result(
|
2024-11-05 11:02:31 +08:00
|
|
|
message='Datasets use different embedding models."',
|
2025-11-04 15:12:53 +08:00
|
|
|
code=RetCode.DATA_ERROR,
|
2024-11-04 08:35:36 +01:00
|
|
|
)
|
2024-10-16 18:41:24 +08:00
|
|
|
if "question" not in req:
|
2024-10-12 19:35:19 +08:00
|
|
|
return get_error_data_result("`question` is required.")
|
2024-11-04 20:03:14 +08:00
|
|
|
page = int(req.get("page", 1))
|
2024-11-05 14:07:31 +08:00
|
|
|
size = int(req.get("page_size", 30))
|
2024-10-16 10:21:08 +08:00
|
|
|
question = req["question"]
|
2026-01-27 15:57:47 +08:00
|
|
|
# Trim whitespace and validate question
|
|
|
|
|
if isinstance(question, str):
|
|
|
|
|
question = question.strip()
|
|
|
|
|
# Return empty result if question is empty or whitespace-only
|
|
|
|
|
if not question:
|
|
|
|
|
return get_result(data={"total": 0, "chunks": [], "doc_aggs": {}})
|
2024-10-24 20:05:21 +08:00
|
|
|
doc_ids = req.get("document_ids", [])
|
2025-01-22 19:43:14 +08:00
|
|
|
use_kg = req.get("use_kg", False)
|
2025-11-21 14:51:58 +08:00
|
|
|
toc_enhance = req.get("toc_enhance", False)
|
2025-07-21 17:25:28 +08:00
|
|
|
langs = req.get("cross_languages", [])
|
2024-11-04 08:35:36 +01:00
|
|
|
if not isinstance(doc_ids, list):
|
2026-03-24 19:24:41 +08:00
|
|
|
return get_error_data_result("`documents` should be a list")
|
|
|
|
|
if doc_ids:
|
2026-01-08 13:22:58 +08:00
|
|
|
doc_ids_list = KnowledgebaseService.list_documents_by_ids(kb_ids)
|
|
|
|
|
for doc_id in doc_ids:
|
|
|
|
|
if doc_id not in doc_ids_list:
|
|
|
|
|
return get_error_data_result(f"The datasets don't own the document {doc_id}")
|
2025-09-05 11:12:15 +08:00
|
|
|
if not doc_ids:
|
2026-01-27 05:45:58 +01:00
|
|
|
metadata_condition = req.get("metadata_condition")
|
|
|
|
|
if metadata_condition:
|
2026-03-10 11:57:32 +08:00
|
|
|
metas = DocMetadataService.get_flatted_meta_by_kbs(kb_ids)
|
2026-01-27 05:45:58 +01:00
|
|
|
doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
|
|
|
|
|
# If metadata_condition has conditions but no docs match, return empty result
|
|
|
|
|
if not doc_ids and metadata_condition.get("conditions"):
|
|
|
|
|
return get_result(data={"total": 0, "chunks": [], "doc_aggs": {}})
|
|
|
|
|
if metadata_condition and not doc_ids:
|
|
|
|
|
doc_ids = ["-999"]
|
|
|
|
|
else:
|
|
|
|
|
# If doc_ids is None all documents of the datasets are used
|
|
|
|
|
doc_ids = None
|
2024-10-16 10:21:08 +08:00
|
|
|
similarity_threshold = float(req.get("similarity_threshold", 0.2))
|
2024-09-18 11:08:19 +08:00
|
|
|
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
|
|
|
|
top = int(req.get("top_k", 1024))
|
2026-04-29 14:10:24 +00:00
|
|
|
if top <= 0:
|
|
|
|
|
return get_error_data_result("`top_k` must be greater than 0")
|
2026-02-09 14:56:10 +08:00
|
|
|
highlight_val = req.get("highlight", None)
|
|
|
|
|
if highlight_val is None:
|
2024-10-14 20:03:33 +08:00
|
|
|
highlight = False
|
2026-02-09 14:56:10 +08:00
|
|
|
elif isinstance(highlight_val, bool):
|
|
|
|
|
highlight = highlight_val
|
|
|
|
|
elif isinstance(highlight_val, str):
|
|
|
|
|
if highlight_val.lower() in ["true", "false"]:
|
|
|
|
|
highlight = highlight_val.lower() == "true"
|
|
|
|
|
else:
|
|
|
|
|
return get_error_data_result("`highlight` should be a boolean")
|
2024-10-14 20:03:33 +08:00
|
|
|
else:
|
2026-02-09 14:56:10 +08:00
|
|
|
return get_error_data_result("`highlight` should be a boolean")
|
2026-04-30 18:13:27 +03:00
|
|
|
include_metadata, metadata_fields = _resolve_reference_metadata(req)
|
2024-09-18 11:08:19 +08:00
|
|
|
try:
|
2025-05-07 16:05:40 +08:00
|
|
|
tenant_ids = list(set([kb.tenant_id for kb in kbs]))
|
2024-10-21 14:29:06 +08:00
|
|
|
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
|
2024-09-18 11:08:19 +08:00
|
|
|
if not e:
|
2024-11-05 11:02:31 +08:00
|
|
|
return get_error_data_result(message="Dataset not found!")
|
2026-03-05 17:27:17 +08:00
|
|
|
if kb.tenant_embd_id:
|
|
|
|
|
embd_model_config = get_model_config_by_id(kb.tenant_embd_id)
|
|
|
|
|
else:
|
|
|
|
|
embd_model_config = get_model_config_by_type_and_name(kb.tenant_id, LLMType.EMBEDDING, kb.embd_id)
|
|
|
|
|
embd_mdl = LLMBundle(kb.tenant_id, embd_model_config)
|
2024-09-18 11:08:19 +08:00
|
|
|
|
|
|
|
|
rerank_mdl = None
|
2026-03-05 17:27:17 +08:00
|
|
|
if req.get("tenant_rerank_id"):
|
|
|
|
|
rerank_model_config = get_model_config_by_id(req["tenant_rerank_id"])
|
|
|
|
|
rerank_mdl = LLMBundle(kb.tenant_id, rerank_model_config)
|
|
|
|
|
elif req.get("rerank_id"):
|
|
|
|
|
rerank_model_config = get_model_config_by_type_and_name(kb.tenant_id, LLMType.RERANK, req["rerank_id"])
|
|
|
|
|
rerank_mdl = LLMBundle(kb.tenant_id, rerank_model_config)
|
2024-09-18 11:08:19 +08:00
|
|
|
|
2025-07-21 17:25:28 +08:00
|
|
|
if langs:
|
2025-12-11 17:38:17 +08:00
|
|
|
question = await cross_languages(kb.tenant_id, None, question, langs)
|
2025-07-21 17:25:28 +08:00
|
|
|
|
2024-09-18 11:08:19 +08:00
|
|
|
if req.get("keyword", False):
|
2026-03-05 17:27:17 +08:00
|
|
|
chat_model_config = get_tenant_default_model_by_type(kb.tenant_id, LLMType.CHAT)
|
|
|
|
|
chat_mdl = LLMBundle(kb.tenant_id, chat_model_config)
|
2025-12-11 17:38:17 +08:00
|
|
|
question += await keyword_extraction(chat_mdl, question)
|
2024-09-18 11:08:19 +08:00
|
|
|
|
2026-01-15 12:28:49 +08:00
|
|
|
ranks = await settings.retriever.retrieval(
|
2024-11-04 08:35:36 +01:00
|
|
|
question,
|
|
|
|
|
embd_mdl,
|
2025-05-07 16:05:40 +08:00
|
|
|
tenant_ids,
|
2024-11-04 08:35:36 +01:00
|
|
|
kb_ids,
|
|
|
|
|
page,
|
|
|
|
|
size,
|
|
|
|
|
similarity_threshold,
|
|
|
|
|
vector_similarity_weight,
|
|
|
|
|
top,
|
|
|
|
|
doc_ids,
|
|
|
|
|
rerank_mdl=rerank_mdl,
|
|
|
|
|
highlight=highlight,
|
2025-06-05 12:46:29 +08:00
|
|
|
rank_feature=label_question(question, kbs),
|
2024-11-04 08:35:36 +01:00
|
|
|
)
|
2025-11-21 14:51:58 +08:00
|
|
|
if toc_enhance:
|
2026-03-05 17:27:17 +08:00
|
|
|
chat_model_config = get_tenant_default_model_by_type(kb.tenant_id, LLMType.CHAT)
|
|
|
|
|
chat_mdl = LLMBundle(kb.tenant_id, chat_model_config)
|
2026-01-07 15:35:30 +08:00
|
|
|
cks = await settings.retriever.retrieval_by_toc(question, ranks["chunks"], tenant_ids, chat_mdl, size)
|
2025-11-21 14:51:58 +08:00
|
|
|
if cks:
|
|
|
|
|
ranks["chunks"] = cks
|
2026-01-23 12:54:08 +08:00
|
|
|
ranks["chunks"] = settings.retriever.retrieval_by_children(ranks["chunks"], tenant_ids)
|
2025-01-22 19:43:14 +08:00
|
|
|
if use_kg:
|
2026-03-05 17:27:17 +08:00
|
|
|
chat_model_config = get_tenant_default_model_by_type(kb.tenant_id, LLMType.CHAT)
|
|
|
|
|
ck = await settings.kg_retriever.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, chat_model_config))
|
2025-01-22 19:43:14 +08:00
|
|
|
if ck["content_with_weight"]:
|
|
|
|
|
ranks["chunks"].insert(0, ck)
|
|
|
|
|
|
2024-09-18 11:08:19 +08:00
|
|
|
for c in ranks["chunks"]:
|
2024-11-19 14:15:25 +08:00
|
|
|
c.pop("vector", None)
|
2024-09-18 11:08:19 +08:00
|
|
|
|
2026-04-30 18:13:27 +03:00
|
|
|
if include_metadata:
|
|
|
|
|
logging.info(
|
|
|
|
|
"sdk.retrieval reference_metadata enabled dataset_ids=%s fields=%s chunks=%s",
|
|
|
|
|
kb_ids,
|
|
|
|
|
sorted(metadata_fields) if metadata_fields else None,
|
|
|
|
|
len(ranks["chunks"]),
|
|
|
|
|
)
|
|
|
|
|
enrich_chunks_with_document_metadata(ranks["chunks"], metadata_fields)
|
|
|
|
|
|
2024-09-18 11:08:19 +08:00
|
|
|
##rename keys
|
2024-10-12 19:35:19 +08:00
|
|
|
renamed_chunks = []
|
2024-09-18 11:08:19 +08:00
|
|
|
for chunk in ranks["chunks"]:
|
|
|
|
|
key_mapping = {
|
|
|
|
|
"chunk_id": "id",
|
|
|
|
|
"content_with_weight": "content",
|
|
|
|
|
"doc_id": "document_id",
|
|
|
|
|
"important_kwd": "important_keywords",
|
2024-12-05 14:51:19 +08:00
|
|
|
"question_kwd": "questions",
|
2024-11-04 08:35:36 +01:00
|
|
|
"docnm_kwd": "document_keyword",
|
2025-06-05 12:46:29 +08:00
|
|
|
"kb_id": "dataset_id",
|
2024-09-18 11:08:19 +08:00
|
|
|
}
|
2024-10-12 19:35:19 +08:00
|
|
|
rename_chunk = {}
|
2024-09-18 11:08:19 +08:00
|
|
|
for key, value in chunk.items():
|
|
|
|
|
new_key = key_mapping.get(key, key)
|
|
|
|
|
rename_chunk[new_key] = value
|
2024-10-14 20:03:33 +08:00
|
|
|
renamed_chunks.append(rename_chunk)
|
2024-09-18 11:08:19 +08:00
|
|
|
ranks["chunks"] = renamed_chunks
|
2024-10-12 19:35:19 +08:00
|
|
|
return get_result(data=ranks)
|
2024-09-18 11:08:19 +08:00
|
|
|
except Exception as e:
|
|
|
|
|
if str(e).find("not_found") > 0:
|
2024-11-04 08:35:36 +01:00
|
|
|
return get_result(
|
2024-11-05 11:02:31 +08:00
|
|
|
message="No chunk found! Check the chunk status please!",
|
2025-11-04 15:12:53 +08:00
|
|
|
code=RetCode.DATA_ERROR,
|
2024-11-04 08:35:36 +01:00
|
|
|
)
|
2024-11-15 17:30:56 +08:00
|
|
|
return server_error_response(e)
|