From 5018459112460a584c48c6f0d086590ee80e958b Mon Sep 17 00:00:00 2001 From: Wang Qi Date: Wed, 29 Apr 2026 21:09:54 +0800 Subject: [PATCH] Fix metadata config (#14480) ### What problem does this PR solve? Fix metadata config ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/services/dataset_api_service.py | 36 +---- api/utils/validation_utils.py | 11 +- .../test_metadata_retrieval.py | 9 +- .../test_auto_metadata.py | 126 ------------------ .../metedata/hooks/use-manage-modal.ts | 1 + 5 files changed, 14 insertions(+), 169 deletions(-) delete mode 100644 test/testcases/test_sdk_api/test_dataset_mangement/test_auto_metadata.py diff --git a/api/apps/services/dataset_api_service.py b/api/apps/services/dataset_api_service.py index 3d062ab599..62d38ba374 100644 --- a/api/apps/services/dataset_api_service.py +++ b/api/apps/services/dataset_api_service.py @@ -650,25 +650,8 @@ def get_auto_metadata(dataset_id: str, tenant_id: str): kb = KnowledgebaseService.get_or_none(id=dataset_id, tenant_id=tenant_id) if kb is None: return False, f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'" - parser_cfg = kb.parser_config or {} - metadata = parser_cfg.get("metadata") or [] - enabled = parser_cfg.get("enable_metadata", bool(metadata)) - # Normalize to AutoMetadataConfig-like JSON - fields = [] - for f in metadata: - if not isinstance(f, dict): - continue - fields.append( - { - "name": f.get("name", ""), - "type": f.get("type", ""), - "description": f.get("description"), - "examples": f.get("examples"), - "restrict_values": f.get("restrict_values", False), - } - ) - return True, {"enabled": enabled, "fields": fields} + return True, {"metadata": parser_cfg.get("metadata") or [], "built_in_metadata": parser_cfg.get("built_in_metadata") or []} async def update_auto_metadata(dataset_id: str, tenant_id: str, cfg: dict): @@ -685,24 +668,13 @@ async def update_auto_metadata(dataset_id: str, tenant_id: str, cfg: dict): return False, f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'" parser_cfg = kb.parser_config or {} - fields = [] - for f in cfg.get("fields", []): - fields.append( - { - "name": f.get("name", ""), - "type": f.get("type", ""), - "description": f.get("description"), - "examples": f.get("examples"), - "restrict_values": f.get("restrict_values", False), - } - ) - parser_cfg["metadata"] = fields - parser_cfg["enable_metadata"] = cfg.get("enabled", True) + parser_cfg["metadata"] = cfg.get("metadata") + parser_cfg["built_in_metadata"] = cfg.get("built_in_metadata") if not KnowledgebaseService.update_by_id(kb.id, {"parser_config": parser_cfg}): return False, "Update auto-metadata error.(Database error)" - return True, {"enabled": parser_cfg["enable_metadata"], "fields": fields} + return True, cfg def delete_tags(dataset_id: str, tenant_id: str, tags: list[str]): diff --git a/api/utils/validation_utils.py b/api/utils/validation_utils.py index 8ded91261c..f570bacc3d 100644 --- a/api/utils/validation_utils.py +++ b/api/utils/validation_utils.py @@ -364,18 +364,17 @@ class ParentChildConfig(Base): class AutoMetadataField(Base): """Schema for a single auto-metadata field configuration.""" - name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(...)] - type: Annotated[Literal["string", "list", "time"], Field(...)] + key: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(...)] + type: Annotated[Literal["string", "list", "time", "number"], Field(...)] description: Annotated[str | None, Field(default=None, max_length=65535)] - examples: Annotated[list[str] | None, Field(default=None)] - restrict_values: Annotated[bool, Field(default=False)] + enum: Annotated[list[str] | None, Field(default=None)] class AutoMetadataConfig(Base): """Top-level auto-metadata configuration attached to a dataset.""" - enabled: Annotated[bool, Field(default=True)] - fields: Annotated[list[AutoMetadataField], Field(default_factory=list)] + metadata: Annotated[list[AutoMetadataField], Field(default_factory=list)] + built_in_metadata: Annotated[list[AutoMetadataField], Field(default_factory=list)] class ParserConfig(Base): diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_metadata_retrieval.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_metadata_retrieval.py index 9b0dd18cde..77f9312470 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_metadata_retrieval.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_metadata_retrieval.py @@ -70,11 +70,10 @@ def add_dataset_with_metadata(HttpApiAuth): headers={"Content-Type": "application/json"}, auth=HttpApiAuth, json={ - "enabled": False, - "fields": [ - {"name": "character", "type": "string", "description": "Historical figure name"}, - {"name": "era", "type": "string", "description": "Historical era"}, - {"name": "achievements", "type": "list", "description": "Major achievements"}, + "metadata": [ + {"key": "character", "type": "string", "description": "Historical figure name"}, + {"key": "era", "type": "string", "description": "Historical era"}, + {"key": "achievements", "type": "list", "description": "Major achievements"}, ] } ).json() diff --git a/test/testcases/test_sdk_api/test_dataset_mangement/test_auto_metadata.py b/test/testcases/test_sdk_api/test_dataset_mangement/test_auto_metadata.py deleted file mode 100644 index 908d95dae3..0000000000 --- a/test/testcases/test_sdk_api/test_dataset_mangement/test_auto_metadata.py +++ /dev/null @@ -1,126 +0,0 @@ -# -# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest - - -@pytest.mark.usefixtures("clear_datasets") -class TestAutoMetadataOnCreate: - @pytest.mark.p1 - def test_create_dataset_with_auto_metadata(self, client): - payload = { - "name": "auto_metadata_create", - "auto_metadata_config": { - "enabled": True, - "fields": [ - { - "name": "author", - "type": "string", - "description": "The author of the document", - "examples": ["John Doe", "Jane Smith"], - "restrict_values": False, - }, - { - "name": "category", - "type": "list", - "description": "Document category", - "examples": ["Technical", "Business"], - "restrict_values": True, - }, - ], - }, - } - dataset = client.create_dataset(**payload) - # The SDK should expose parser_config via internal properties or metadata; - # we rely on the HTTP API for verification via get_auto_metadata. - cfg = dataset.get_auto_metadata() - assert cfg["enabled"] is True - assert len(cfg["fields"]) == 2 - names = {f["name"] for f in cfg["fields"]} - assert names == {"author", "category"} - - -@pytest.mark.usefixtures("clear_datasets") -class TestAutoMetadataOnUpdate: - @pytest.mark.p1 - def test_update_auto_metadata_via_dataset_update(self, client, add_dataset_func): - dataset = add_dataset_func - - # Initially set auto-metadata via dataset.update - payload = { - "auto_metadata_config": { - "enabled": True, - "fields": [ - { - "name": "tags", - "type": "list", - "description": "Document tags", - "examples": ["AI", "ML", "RAG"], - "restrict_values": False, - } - ], - } - } - dataset.update(payload) - - cfg = dataset.get_auto_metadata() - assert cfg["enabled"] is True - assert len(cfg["fields"]) == 1 - assert cfg["fields"][0]["name"] == "tags" - assert cfg["fields"][0]["type"] == "list" - - # Disable auto-metadata and replace fields - update_cfg = { - "enabled": False, - "fields": [ - { - "name": "year", - "type": "time", - "description": "Publication year", - "examples": None, - "restrict_values": False, - } - ], - } - dataset.update_auto_metadata(**update_cfg) - - cfg2 = dataset.get_auto_metadata() - assert cfg2["enabled"] is False - assert len(cfg2["fields"]) == 1 - assert cfg2["fields"][0]["name"] == "year" - assert cfg2["fields"][0]["type"] == "time" - - -@pytest.mark.usefixtures("clear_datasets") -class TestAutoMetadataValidation: - @pytest.mark.p2 - def test_invalid_field_type_rejected(self, client): - payload = { - "name": "auto_metadata_invalid_type", - "auto_metadata_config": { - "enabled": True, - "fields": [ - { - "name": "invalid_type", - "type": "unknown", # invalid literal - } - ], - }, - } - with pytest.raises(Exception) as exc_info: - client.create_dataset(**payload) - msg = str(exc_info.value) - # Pydantic literal_error message should appear - assert "Input should be" in msg or "literal_error" in msg - diff --git a/web/src/pages/dataset/components/metedata/hooks/use-manage-modal.ts b/web/src/pages/dataset/components/metedata/hooks/use-manage-modal.ts index 1070782ecf..ef360f197a 100644 --- a/web/src/pages/dataset/components/metedata/hooks/use-manage-modal.ts +++ b/web/src/pages/dataset/components/metedata/hooks/use-manage-modal.ts @@ -98,6 +98,7 @@ export const util = { return data.map((item) => { return { key: item.field, + type: item.valueType?.toLowerCase(), description: item.description, enum: item.values, };