mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-07-01 16:25:44 +08:00
Fix metadata parsing regression for upgraded v0.24 datasets (#14383)
### What problem does this PR solve? This PR fixes issue #14371 where file parsing failed after upgrading from v0.24.0 to v0.25.0, because metadata config could be a JSON Schema object but was handled like a list and later caused `KeyError: 'properties'`. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -427,7 +427,23 @@ async def build_chunks(task, progress_callback):
|
||||
chat_mdl = LLMBundle(task["tenant_id"], chat_model_config, lang=task["language"])
|
||||
|
||||
async def gen_metadata_task(chat_mdl, d):
|
||||
metadata_conf = list(task["parser_config"].get("metadata", [])) + list(task["parser_config"].get("built_in_metadata") or [])
|
||||
metadata_conf = task["parser_config"].get("metadata", [])
|
||||
built_in_metadata = list(task["parser_config"].get("built_in_metadata") or [])
|
||||
if isinstance(metadata_conf, dict):
|
||||
if not isinstance(metadata_conf.get("properties"), dict):
|
||||
metadata_conf = {"type": "object", "properties": {}}
|
||||
if built_in_metadata:
|
||||
metadata_conf = {
|
||||
**metadata_conf,
|
||||
"properties": {
|
||||
**metadata_conf.get("properties", {}),
|
||||
**turn2jsonschema(built_in_metadata).get("properties", {}),
|
||||
},
|
||||
}
|
||||
elif isinstance(metadata_conf, list):
|
||||
metadata_conf = metadata_conf + built_in_metadata
|
||||
else:
|
||||
metadata_conf = built_in_metadata
|
||||
cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata",
|
||||
metadata_conf)
|
||||
if not cached:
|
||||
|
||||
Reference in New Issue
Block a user