diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 7ed5d0cca4..364a959cd8 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1682,7 +1682,7 @@ async def retrieval_test(tenant_id): if not doc_ids: metadata_condition = req.get("metadata_condition") if metadata_condition: - metas = DocMetadataService.get_meta_by_kbs(kb_ids) + metas = DocMetadataService.get_flatted_meta_by_kbs(kb_ids) doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")) # If metadata_condition has conditions but no docs match, return empty result if not doc_ids and metadata_condition.get("conditions"): diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 30f6f047de..3cf70b6d94 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -438,6 +438,12 @@ class Dealer: # When vector_similarity_weight is 0, similarity_threshold is not meaningful for term-only scores. post_threshold = 0.0 if vector_similarity_weight <= 0 else similarity_threshold + + # When doc_ids is explicitly provided (metadata or document filtering), bypass threshold + # User wants those specific documents regardless of their relevance score + if doc_ids: + post_threshold = 0.0 + valid_idx = [int(i) for i in sorted_idx if sim_np[i] >= post_threshold] filtered_count = len(valid_idx) ranks["total"] = int(filtered_count) diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py index 872563ccae..cd4c2e9d23 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_doc_sdk_routes_unit.py @@ -220,6 +220,11 @@ def _load_doc_module(monkeypatch): tenant_llm_service_mod.TenantService = _StubTenantService tenant_llm_service_mod.TenantLLMService = _StubTenantLLMService + + class _StubLLMFactoriesService: + pass + + tenant_llm_service_mod.LLMFactoriesService = _StubLLMFactoriesService monkeypatch.setitem(sys.modules, "api.db.services.tenant_llm_service", tenant_llm_service_mod) # Mock LLMService @@ -993,7 +998,7 @@ class TestDocRoutesUnit: "get_request_json", lambda: _AwaitableValue({"dataset_ids": ["ds-1"], "question": "q", "metadata_condition": {"logic": "and"}}), ) - monkeypatch.setattr(module.DocMetadataService, "get_meta_by_kbs", lambda _ids: []) + monkeypatch.setattr(module.DocMetadataService, "get_flatted_meta_by_kbs", lambda _kbs: []) monkeypatch.setattr(module, "meta_filter", lambda *_args, **_kwargs: []) res = _run(module.retrieval_test.__wrapped__("tenant-1")) assert "code" in res