diff --git a/internal/service/metadata.go b/internal/service/metadata.go index a4be1412e3..b74d5480c8 100644 --- a/internal/service/metadata.go +++ b/internal/service/metadata.go @@ -22,6 +22,7 @@ import ( "fmt" "strconv" + "ragflow/internal/common" "ragflow/internal/dao" "ragflow/internal/engine" "ragflow/internal/engine/types" @@ -131,9 +132,9 @@ func (s *MetadataService) SearchMetadataByKBs(kbIDs []string, size int) (*Search // GetFlattedMetaByKBs returns flattened metadata in the format: // {field_name: {value: [doc_ids]}} -func (s *MetadataService) GetFlattedMetaByKBs(kbIDs []string) (map[string]interface{}, error) { +func (s *MetadataService) GetFlattedMetaByKBs(kbIDs []string) (common.MetaData, error) { if len(kbIDs) == 0 { - return make(map[string]interface{}), nil + return make(common.MetaData), nil } // Get metadata for all docs in KBs (use large limit like Python's 10000) @@ -142,7 +143,7 @@ func (s *MetadataService) GetFlattedMetaByKBs(kbIDs []string) (map[string]interf return nil, err } - flattedMeta := make(map[string]interface{}) + flattedMeta := make(common.MetaData) for _, chunk := range result.Chunks { // Extract doc_id from chunk @@ -171,13 +172,10 @@ func (s *MetadataService) GetFlattedMetaByKBs(kbIDs []string) (map[string]interf // Initialize field map if not exists if _, exists := flattedMeta[fieldName]; !exists { - flattedMeta[fieldName] = make(map[string]interface{}) + flattedMeta[fieldName] = make(common.MetaValueDocs) } - valueMap, ok := flattedMeta[fieldName].(map[string]interface{}) - if !ok { - continue - } + valueMap := flattedMeta[fieldName] // Handle string, number (float64/int), and list of string/number switch v := fieldValue.(type) { diff --git a/internal/service/metadata_filter.go b/internal/service/metadata_filter.go index 0b675defa1..723ffb1b88 100644 --- a/internal/service/metadata_filter.go +++ b/internal/service/metadata_filter.go @@ -122,7 +122,7 @@ func genMetaFilterPrompt(metaDataJSON, question, constraintsJSON, currentDate st } // GenMetaFilter generates filter conditions using LLM based on metadata and question. -func GenMetaFilter(ctx context.Context, chatModel *modelModule.ChatModel, metaData map[string]interface{}, question string, constraints map[string]string) (*MetaFilterResult, error) { +func GenMetaFilter(ctx context.Context, chatModel *modelModule.ChatModel, metaData common.MetaData, question string, constraints map[string]string) (*MetaFilterResult, error) { if chatModel == nil { return nil, fmt.Errorf("chat model is nil") } @@ -134,13 +134,11 @@ func GenMetaFilter(ctx context.Context, chatModel *modelModule.ChatModel, metaDa // Build metadata structure for prompt metaDataStructure := make(map[string][]string) for key, values := range metaData { - if valueMap, ok := values.(map[string]interface{}); ok { - keys := make([]string, 0, len(valueMap)) - for k := range valueMap { - keys = append(keys, k) - } - metaDataStructure[key] = keys + keys := make([]string, 0, len(values)) + for k := range values { + keys = append(keys, k) } + metaDataStructure[key] = keys } metaDataJSON, _ := json.Marshal(metaDataStructure) @@ -202,7 +200,7 @@ func GenMetaFilter(ctx context.Context, chatModel *modelModule.ChatModel, metaDa } // ApplyMetaFilter applies filter conditions to metadata and returns matching doc IDs -func ApplyMetaFilter(metaData map[string]interface{}, filters []MetaFilterCondition, logic string) []string { +func ApplyMetaFilter(metaData common.MetaData, filters []MetaFilterCondition, logic string) []string { if len(filters) == 0 { return []string{} } @@ -243,66 +241,36 @@ func ApplyMetaFilter(metaData map[string]interface{}, filters []MetaFilterCondit } // applySingleCondition applies a single filter condition and returns matching doc IDs -func applySingleCondition(metaData map[string]interface{}, condition MetaFilterCondition) []string { +func applySingleCondition(metaData common.MetaData, condition MetaFilterCondition) []string { key := condition.Key value := condition.Value op := condition.Op - valueMap, ok := metaData[key].(map[string]interface{}) - if !ok { - return []string{} - } + valueMap := metaData[key] var result []string switch op { case "=", "==": if docIDs, exists := valueMap[value]; exists { - switch v := docIDs.(type) { - case []interface{}: - for _, id := range v { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - case []string: - result = append(result, v...) - } + result = append(result, docIDs...) } case "!=", "≠": for val, docIDs := range valueMap { if val != value { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } + result = append(result, docIDs...) } } case "contains": for val, docIDs := range valueMap { if strings.Contains(strings.ToLower(val), strings.ToLower(value)) { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } + result = append(result, docIDs...) } } case "not contains": for val, docIDs := range valueMap { if !strings.Contains(strings.ToLower(val), strings.ToLower(value)) { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } + result = append(result, docIDs...) } } case "in": @@ -310,13 +278,7 @@ func applySingleCondition(metaData map[string]interface{}, condition MetaFilterC for _, v := range values { v = strings.TrimSpace(v) if docIDs, exists := valueMap[v]; exists { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } + result = append(result, docIDs...) } } case "not in": @@ -326,112 +288,19 @@ func applySingleCondition(metaData map[string]interface{}, condition MetaFilterC } for val, docIDs := range valueMap { if !excludeValues[strings.ToLower(val)] { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } + result = append(result, docIDs...) } } case "start with": for val, docIDs := range valueMap { if strings.HasPrefix(strings.ToLower(val), strings.ToLower(value)) { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } + result = append(result, docIDs...) } } case "end with": for val, docIDs := range valueMap { if strings.HasSuffix(strings.ToLower(val), strings.ToLower(value)) { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } - } - } - case "empty": - if len(valueMap) == 0 { - return []string{} - } - case "not empty": - if len(valueMap) > 0 { - for _, docIDs := range valueMap { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } - } - } - case ">": - for val, docIDs := range valueMap { - if val > value { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } - } - } - case "<": - for val, docIDs := range valueMap { - if val < value { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } - } - } - case ">=": - for val, docIDs := range valueMap { - if val >= value { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } - } - } - case "<=": - for val, docIDs := range valueMap { - if val <= value { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } - } - } - } - default: - // Default to equality check - if docIDs, exists := valueMap[value]; exists { - if ids, ok := docIDs.([]interface{}); ok { - for _, id := range ids { - if idStr, ok := id.(string); ok { - result = append(result, idStr) - } - } + result = append(result, docIDs...) } } } @@ -439,6 +308,7 @@ func applySingleCondition(metaData map[string]interface{}, condition MetaFilterC return result } + // ApplyMetaDataFilter applies metadata filtering rules and returns filtered doc_ids // Supports three modes: // - auto: generate filter conditions via LLM @@ -447,7 +317,7 @@ func applySingleCondition(metaData map[string]interface{}, condition MetaFilterC func ApplyMetaDataFilter( ctx context.Context, metaDataFilter map[string]interface{}, - metaData map[string]interface{}, + metaData common.MetaData, question string, chatModel *modelModule.ChatModel, baseDocIDs []string, @@ -497,7 +367,7 @@ func ApplyMetaDataFilter( if len(selectedKeys) > 0 { // Filter metadata to only selected keys - filteredMeta := make(map[string]interface{}) + filteredMeta := make(common.MetaData) for _, key := range selectedKeys { if val, exists := metaData[key]; exists { filteredMeta[key] = val diff --git a/internal/service/metadata_filter_test.go b/internal/service/metadata_filter_test.go new file mode 100644 index 0000000000..7919870a4c --- /dev/null +++ b/internal/service/metadata_filter_test.go @@ -0,0 +1,156 @@ +// +// Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package service + +import ( + "testing" + + "ragflow/internal/common" +) + +func TestApplyMetaFilter_Equals(t *testing.T) { + metas := common.MetaData{ + "author": {"Zhang San": {"doc1", "doc2"}, "Li Si": {"doc3"}}, + } + filters := []MetaFilterCondition{{Key: "author", Value: "Zhang San", Op: "="}} + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 2 { + t.Errorf("expected 2 docs, got %d: %v", len(result), result) + } +} + +func TestApplyMetaFilter_NotEquals(t *testing.T) { + metas := common.MetaData{ + "author": {"Zhang San": {"doc1"}, "Li Si": {"doc2"}}, + } + filters := []MetaFilterCondition{{Key: "author", Value: "Zhang San", Op: "!="}} + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 1 || result[0] != "doc2" { + t.Errorf("expected [doc2], got %v", result) + } +} + +func TestApplyMetaFilter_Contains(t *testing.T) { + metas := common.MetaData{ + "title": {"hello world": {"doc1"}, "goodbye": {"doc2"}}, + } + filters := []MetaFilterCondition{{Key: "title", Value: "hello", Op: "contains"}} + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 1 || result[0] != "doc1" { + t.Errorf("expected [doc1], got %v", result) + } +} + +func TestApplyMetaFilter_NotContains(t *testing.T) { + metas := common.MetaData{ + "title": {"hello world": {"doc1"}, "goodbye": {"doc2"}}, + } + filters := []MetaFilterCondition{{Key: "title", Value: "hello", Op: "not contains"}} + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 1 || result[0] != "doc2" { + t.Errorf("expected [doc2], got %v", result) + } +} + +func TestApplyMetaFilter_In(t *testing.T) { + metas := common.MetaData{ + "category": {"A": {"doc1"}, "B": {"doc2"}, "C": {"doc3"}}, + } + filters := []MetaFilterCondition{{Key: "category", Value: "A,B", Op: "in"}} + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 2 { + t.Errorf("expected 2 docs, got %d: %v", len(result), result) + } +} + +func TestApplyMetaFilter_NotIn(t *testing.T) { + metas := common.MetaData{ + "category": {"A": {"doc1"}, "B": {"doc2"}, "C": {"doc3"}}, + } + filters := []MetaFilterCondition{{Key: "category", Value: "A", Op: "not in"}} + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 2 { + t.Errorf("expected 2 docs (B,C), got %d: %v", len(result), result) + } +} + +func TestApplyMetaFilter_StartWith(t *testing.T) { + metas := common.MetaData{ + "code": {"ABC-123": {"doc1"}, "XYZ-456": {"doc2"}}, + } + filters := []MetaFilterCondition{{Key: "code", Value: "abc", Op: "start with"}} + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 1 || result[0] != "doc1" { + t.Errorf("expected [doc1], got %v", result) + } +} + +func TestApplyMetaFilter_EndWith(t *testing.T) { + metas := common.MetaData{ + "code": {"ABC-123": {"doc1"}, "ABC-456": {"doc2"}}, + } + filters := []MetaFilterCondition{{Key: "code", Value: "123", Op: "end with"}} + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 1 || result[0] != "doc1" { + t.Errorf("expected [doc1], got %v", result) + } +} + +func TestApplyMetaFilter_AndLogic(t *testing.T) { + metas := common.MetaData{ + "author": {"Zhang San": {"doc1", "doc2"}, "Li Si": {"doc3"}}, + "year": {"2024": {"doc1"}, "2025": {"doc2", "doc3"}}, + } + filters := []MetaFilterCondition{ + {Key: "author", Value: "Zhang San", Op: "="}, + {Key: "year", Value: "2024", Op: "="}, + } + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 1 || result[0] != "doc1" { + t.Errorf("expected [doc1], got %v", result) + } +} + +func TestApplyMetaFilter_OrLogic(t *testing.T) { + metas := common.MetaData{ + "author": {"Zhang San": {"doc1"}, "Li Si": {"doc2"}}, + } + filters := []MetaFilterCondition{ + {Key: "author", Value: "Zhang San", Op: "="}, + {Key: "author", Value: "Li Si", Op: "="}, + } + result := ApplyMetaFilter(metas, filters, "or") + if len(result) != 2 { + t.Errorf("expected 2 docs, got %d: %v", len(result), result) + } +} + +func TestApplyMetaFilter_EmptyFilters(t *testing.T) { + result := ApplyMetaFilter(nil, nil, "and") + if len(result) != 0 { + t.Errorf("expected 0, got %d", len(result)) + } +} + +func TestApplyMetaFilter_KeyNotFound(t *testing.T) { + metas := common.MetaData{"author": {"Zhang San": {"doc1"}}} + filters := []MetaFilterCondition{{Key: "nonexistent", Value: "x", Op: "="}} + result := ApplyMetaFilter(metas, filters, "and") + if len(result) != 0 { + t.Errorf("expected 0, got %v", result) + } +}