From 53bb2bd9e8ad52acf04833136dac6f648174e35d Mon Sep 17 00:00:00 2001 From: monsterDavid Date: Fri, 29 May 2026 04:33:26 -0700 Subject: [PATCH] fix(metadata): preserve empty AND results across filter conditions (#15386) ## Summary - Fix `meta_filter()` AND logic so an empty result from an early condition is not overwritten when a later condition matches. - Add regression tests for empty-first AND, successful AND intersection, and OR behavior after an empty first condition. Fixes incorrect `/retrieval` metadata filtering when multiple AND conditions are used and the first condition matches no documents. Closes #15360 ## Test plan - [x] `pytest test/unit_test/common/test_metadata_filter_operators.py -v` (19/19 passed) --- common/metadata_utils.py | 7 ++-- .../common/test_metadata_filter_operators.py | 39 +++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/common/metadata_utils.py b/common/metadata_utils.py index 53af2b4eaf..591acb8053 100644 --- a/common/metadata_utils.py +++ b/common/metadata_utils.py @@ -41,7 +41,7 @@ def convert_conditions(metadata_condition): def meta_filter(metas: dict, filters: list[dict], logic: str = "and"): - doc_ids = set([]) + doc_ids = None def normalize_string_values(value): if isinstance(value, str): @@ -159,7 +159,7 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"): v2docs = metas[k] ids = filter_out(v2docs, f["op"], f["value"]) - if not doc_ids: + if doc_ids is None: doc_ids = set(ids) else: if logic == "and": @@ -169,8 +169,7 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"): return [] else: doc_ids = doc_ids | set(ids) - logging.debug(f"meta_filter filters={filters}, logic={logic}, returning doc_ids={list(doc_ids)}") - return list(doc_ids) + return list(doc_ids or []) async def apply_meta_data_filter( diff --git a/test/unit_test/common/test_metadata_filter_operators.py b/test/unit_test/common/test_metadata_filter_operators.py index 23f4c2b643..fc0cca7be8 100644 --- a/test/unit_test/common/test_metadata_filter_operators.py +++ b/test/unit_test/common/test_metadata_filter_operators.py @@ -125,3 +125,42 @@ def test_less_than_or_equal(): filters = [{"key": "score", "op": "≤", "value": "5"}] assert set(meta_filter(metas, filters)) == {"doc1", "doc3"} + + +def test_and_logic_returns_empty_when_first_condition_matches_nothing(): + metas = { + "author": {"Alice": ["doc1"]}, + "page_count": {"40": ["doc2"], "10": ["doc3"]}, + } + filters = [ + {"key": "author", "op": "contains", "value": "Toby"}, + {"key": "page_count", "op": ">", "value": "30"}, + ] + + assert meta_filter(metas, filters, logic="and") == [] + + +def test_and_logic_intersects_matching_conditions(): + metas = { + "author": {"Toby Jones": ["doc1"], "Alice": ["doc2"]}, + "page_count": {"40": ["doc1"], "10": ["doc2"]}, + } + filters = [ + {"key": "author", "op": "contains", "value": "Toby"}, + {"key": "page_count", "op": ">", "value": "30"}, + ] + + assert meta_filter(metas, filters, logic="and") == ["doc1"] + + +def test_or_logic_still_unions_after_empty_first_condition(): + metas = { + "author": {"Alice": ["doc1"]}, + "page_count": {"40": ["doc2"], "10": ["doc3"]}, + } + filters = [ + {"key": "author", "op": "contains", "value": "Toby"}, + {"key": "page_count", "op": ">", "value": "30"}, + ] + + assert set(meta_filter(metas, filters, logic="or")) == {"doc2"}