fix(metadata): preserve empty AND results across filter conditions (#15386)

## Summary
- Fix `meta_filter()` AND logic so an empty result from an early
condition is not overwritten when a later condition matches.
- Add regression tests for empty-first AND, successful AND intersection,
and OR behavior after an empty first condition.

Fixes incorrect `/retrieval` metadata filtering when multiple AND
conditions are used and the first condition matches no documents.

Closes #15360

## Test plan
- [x] `pytest test/unit_test/common/test_metadata_filter_operators.py
-v` (19/19 passed)
This commit is contained in:
monsterDavid
2026-05-29 04:33:26 -07:00
committed by GitHub
parent 2d229dd8aa
commit 53bb2bd9e8
2 changed files with 42 additions and 4 deletions

View File

@@ -41,7 +41,7 @@ def convert_conditions(metadata_condition):
def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
doc_ids = set([])
doc_ids = None
def normalize_string_values(value):
if isinstance(value, str):
@@ -159,7 +159,7 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
v2docs = metas[k]
ids = filter_out(v2docs, f["op"], f["value"])
if not doc_ids:
if doc_ids is None:
doc_ids = set(ids)
else:
if logic == "and":
@@ -169,8 +169,7 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
return []
else:
doc_ids = doc_ids | set(ids)
logging.debug(f"meta_filter filters={filters}, logic={logic}, returning doc_ids={list(doc_ids)}")
return list(doc_ids)
return list(doc_ids or [])
async def apply_meta_data_filter(

View File

@@ -125,3 +125,42 @@ def test_less_than_or_equal():
filters = [{"key": "score", "op": "", "value": "5"}]
assert set(meta_filter(metas, filters)) == {"doc1", "doc3"}
def test_and_logic_returns_empty_when_first_condition_matches_nothing():
metas = {
"author": {"Alice": ["doc1"]},
"page_count": {"40": ["doc2"], "10": ["doc3"]},
}
filters = [
{"key": "author", "op": "contains", "value": "Toby"},
{"key": "page_count", "op": ">", "value": "30"},
]
assert meta_filter(metas, filters, logic="and") == []
def test_and_logic_intersects_matching_conditions():
metas = {
"author": {"Toby Jones": ["doc1"], "Alice": ["doc2"]},
"page_count": {"40": ["doc1"], "10": ["doc2"]},
}
filters = [
{"key": "author", "op": "contains", "value": "Toby"},
{"key": "page_count", "op": ">", "value": "30"},
]
assert meta_filter(metas, filters, logic="and") == ["doc1"]
def test_or_logic_still_unions_after_empty_first_condition():
metas = {
"author": {"Alice": ["doc1"]},
"page_count": {"40": ["doc2"], "10": ["doc3"]},
}
filters = [
{"key": "author", "op": "contains", "value": "Toby"},
{"key": "page_count", "op": ">", "value": "30"},
]
assert set(meta_filter(metas, filters, logic="or")) == {"doc2"}