Fix metadata config (#14480)

### What problem does this PR solve?

Fix metadata config

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Wang Qi
2026-04-29 21:09:54 +08:00
committed by GitHub
parent d4147efc66
commit 5018459112
5 changed files with 14 additions and 169 deletions

View File

@@ -650,25 +650,8 @@ def get_auto_metadata(dataset_id: str, tenant_id: str):
kb = KnowledgebaseService.get_or_none(id=dataset_id, tenant_id=tenant_id)
if kb is None:
return False, f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'"
parser_cfg = kb.parser_config or {}
metadata = parser_cfg.get("metadata") or []
enabled = parser_cfg.get("enable_metadata", bool(metadata))
# Normalize to AutoMetadataConfig-like JSON
fields = []
for f in metadata:
if not isinstance(f, dict):
continue
fields.append(
{
"name": f.get("name", ""),
"type": f.get("type", ""),
"description": f.get("description"),
"examples": f.get("examples"),
"restrict_values": f.get("restrict_values", False),
}
)
return True, {"enabled": enabled, "fields": fields}
return True, {"metadata": parser_cfg.get("metadata") or [], "built_in_metadata": parser_cfg.get("built_in_metadata") or []}
async def update_auto_metadata(dataset_id: str, tenant_id: str, cfg: dict):
@@ -685,24 +668,13 @@ async def update_auto_metadata(dataset_id: str, tenant_id: str, cfg: dict):
return False, f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'"
parser_cfg = kb.parser_config or {}
fields = []
for f in cfg.get("fields", []):
fields.append(
{
"name": f.get("name", ""),
"type": f.get("type", ""),
"description": f.get("description"),
"examples": f.get("examples"),
"restrict_values": f.get("restrict_values", False),
}
)
parser_cfg["metadata"] = fields
parser_cfg["enable_metadata"] = cfg.get("enabled", True)
parser_cfg["metadata"] = cfg.get("metadata")
parser_cfg["built_in_metadata"] = cfg.get("built_in_metadata")
if not KnowledgebaseService.update_by_id(kb.id, {"parser_config": parser_cfg}):
return False, "Update auto-metadata error.(Database error)"
return True, {"enabled": parser_cfg["enable_metadata"], "fields": fields}
return True, cfg
def delete_tags(dataset_id: str, tenant_id: str, tags: list[str]):

View File

@@ -364,18 +364,17 @@ class ParentChildConfig(Base):
class AutoMetadataField(Base):
"""Schema for a single auto-metadata field configuration."""
name: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(...)]
type: Annotated[Literal["string", "list", "time"], Field(...)]
key: Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=255), Field(...)]
type: Annotated[Literal["string", "list", "time", "number"], Field(...)]
description: Annotated[str | None, Field(default=None, max_length=65535)]
examples: Annotated[list[str] | None, Field(default=None)]
restrict_values: Annotated[bool, Field(default=False)]
enum: Annotated[list[str] | None, Field(default=None)]
class AutoMetadataConfig(Base):
"""Top-level auto-metadata configuration attached to a dataset."""
enabled: Annotated[bool, Field(default=True)]
fields: Annotated[list[AutoMetadataField], Field(default_factory=list)]
metadata: Annotated[list[AutoMetadataField], Field(default_factory=list)]
built_in_metadata: Annotated[list[AutoMetadataField], Field(default_factory=list)]
class ParserConfig(Base):

View File

@@ -70,11 +70,10 @@ def add_dataset_with_metadata(HttpApiAuth):
headers={"Content-Type": "application/json"},
auth=HttpApiAuth,
json={
"enabled": False,
"fields": [
{"name": "character", "type": "string", "description": "Historical figure name"},
{"name": "era", "type": "string", "description": "Historical era"},
{"name": "achievements", "type": "list", "description": "Major achievements"},
"metadata": [
{"key": "character", "type": "string", "description": "Historical figure name"},
{"key": "era", "type": "string", "description": "Historical era"},
{"key": "achievements", "type": "list", "description": "Major achievements"},
]
}
).json()

View File

@@ -1,126 +0,0 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
@pytest.mark.usefixtures("clear_datasets")
class TestAutoMetadataOnCreate:
@pytest.mark.p1
def test_create_dataset_with_auto_metadata(self, client):
payload = {
"name": "auto_metadata_create",
"auto_metadata_config": {
"enabled": True,
"fields": [
{
"name": "author",
"type": "string",
"description": "The author of the document",
"examples": ["John Doe", "Jane Smith"],
"restrict_values": False,
},
{
"name": "category",
"type": "list",
"description": "Document category",
"examples": ["Technical", "Business"],
"restrict_values": True,
},
],
},
}
dataset = client.create_dataset(**payload)
# The SDK should expose parser_config via internal properties or metadata;
# we rely on the HTTP API for verification via get_auto_metadata.
cfg = dataset.get_auto_metadata()
assert cfg["enabled"] is True
assert len(cfg["fields"]) == 2
names = {f["name"] for f in cfg["fields"]}
assert names == {"author", "category"}
@pytest.mark.usefixtures("clear_datasets")
class TestAutoMetadataOnUpdate:
@pytest.mark.p1
def test_update_auto_metadata_via_dataset_update(self, client, add_dataset_func):
dataset = add_dataset_func
# Initially set auto-metadata via dataset.update
payload = {
"auto_metadata_config": {
"enabled": True,
"fields": [
{
"name": "tags",
"type": "list",
"description": "Document tags",
"examples": ["AI", "ML", "RAG"],
"restrict_values": False,
}
],
}
}
dataset.update(payload)
cfg = dataset.get_auto_metadata()
assert cfg["enabled"] is True
assert len(cfg["fields"]) == 1
assert cfg["fields"][0]["name"] == "tags"
assert cfg["fields"][0]["type"] == "list"
# Disable auto-metadata and replace fields
update_cfg = {
"enabled": False,
"fields": [
{
"name": "year",
"type": "time",
"description": "Publication year",
"examples": None,
"restrict_values": False,
}
],
}
dataset.update_auto_metadata(**update_cfg)
cfg2 = dataset.get_auto_metadata()
assert cfg2["enabled"] is False
assert len(cfg2["fields"]) == 1
assert cfg2["fields"][0]["name"] == "year"
assert cfg2["fields"][0]["type"] == "time"
@pytest.mark.usefixtures("clear_datasets")
class TestAutoMetadataValidation:
@pytest.mark.p2
def test_invalid_field_type_rejected(self, client):
payload = {
"name": "auto_metadata_invalid_type",
"auto_metadata_config": {
"enabled": True,
"fields": [
{
"name": "invalid_type",
"type": "unknown", # invalid literal
}
],
},
}
with pytest.raises(Exception) as exc_info:
client.create_dataset(**payload)
msg = str(exc_info.value)
# Pydantic literal_error message should appear
assert "Input should be" in msg or "literal_error" in msg

View File

@@ -98,6 +98,7 @@ export const util = {
return data.map((item) => {
return {
key: item.field,
type: item.valueType?.toLowerCase(),
description: item.description,
enum: item.values,
};