mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
fix(metadata): preserve empty AND results across filter conditions (#15386)
## Summary - Fix `meta_filter()` AND logic so an empty result from an early condition is not overwritten when a later condition matches. - Add regression tests for empty-first AND, successful AND intersection, and OR behavior after an empty first condition. Fixes incorrect `/retrieval` metadata filtering when multiple AND conditions are used and the first condition matches no documents. Closes #15360 ## Test plan - [x] `pytest test/unit_test/common/test_metadata_filter_operators.py -v` (19/19 passed)
This commit is contained in:
@@ -41,7 +41,7 @@ def convert_conditions(metadata_condition):
|
||||
|
||||
|
||||
def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
|
||||
doc_ids = set([])
|
||||
doc_ids = None
|
||||
|
||||
def normalize_string_values(value):
|
||||
if isinstance(value, str):
|
||||
@@ -159,7 +159,7 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
|
||||
v2docs = metas[k]
|
||||
ids = filter_out(v2docs, f["op"], f["value"])
|
||||
|
||||
if not doc_ids:
|
||||
if doc_ids is None:
|
||||
doc_ids = set(ids)
|
||||
else:
|
||||
if logic == "and":
|
||||
@@ -169,8 +169,7 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
|
||||
return []
|
||||
else:
|
||||
doc_ids = doc_ids | set(ids)
|
||||
logging.debug(f"meta_filter filters={filters}, logic={logic}, returning doc_ids={list(doc_ids)}")
|
||||
return list(doc_ids)
|
||||
return list(doc_ids or [])
|
||||
|
||||
|
||||
async def apply_meta_data_filter(
|
||||
|
||||
@@ -125,3 +125,42 @@ def test_less_than_or_equal():
|
||||
filters = [{"key": "score", "op": "≤", "value": "5"}]
|
||||
|
||||
assert set(meta_filter(metas, filters)) == {"doc1", "doc3"}
|
||||
|
||||
|
||||
def test_and_logic_returns_empty_when_first_condition_matches_nothing():
|
||||
metas = {
|
||||
"author": {"Alice": ["doc1"]},
|
||||
"page_count": {"40": ["doc2"], "10": ["doc3"]},
|
||||
}
|
||||
filters = [
|
||||
{"key": "author", "op": "contains", "value": "Toby"},
|
||||
{"key": "page_count", "op": ">", "value": "30"},
|
||||
]
|
||||
|
||||
assert meta_filter(metas, filters, logic="and") == []
|
||||
|
||||
|
||||
def test_and_logic_intersects_matching_conditions():
|
||||
metas = {
|
||||
"author": {"Toby Jones": ["doc1"], "Alice": ["doc2"]},
|
||||
"page_count": {"40": ["doc1"], "10": ["doc2"]},
|
||||
}
|
||||
filters = [
|
||||
{"key": "author", "op": "contains", "value": "Toby"},
|
||||
{"key": "page_count", "op": ">", "value": "30"},
|
||||
]
|
||||
|
||||
assert meta_filter(metas, filters, logic="and") == ["doc1"]
|
||||
|
||||
|
||||
def test_or_logic_still_unions_after_empty_first_condition():
|
||||
metas = {
|
||||
"author": {"Alice": ["doc1"]},
|
||||
"page_count": {"40": ["doc2"], "10": ["doc3"]},
|
||||
}
|
||||
filters = [
|
||||
{"key": "author", "op": "contains", "value": "Toby"},
|
||||
{"key": "page_count", "op": ">", "value": "30"},
|
||||
]
|
||||
|
||||
assert set(meta_filter(metas, filters, logic="or")) == {"doc2"}
|
||||
|
||||
Reference in New Issue
Block a user