mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
### What problem does this PR solve? Fixes #14651. `kb_prompt()` in `rag/prompts/generator.py` crashes with `AttributeError: 'NoneType' object has no attribute 'items'` during agent citation generation when a retrieved chunk carries `document_metadata: null`. **Root cause.** The crash happens at `rag/prompts/generator.py:132-133`: ```python meta = ck.get("document_metadata", {}) for k, v in meta.items(): ``` `dict.get(key, default)` only returns the default when the key is *missing*. When the key is present with an explicit `None` value, `.get()` returns `None`, and `.items()` crashes. **How the chunk gets `None`.** It's a round-trip inside RAGFlow itself, not bad input from retrieval: 1. The agent stores retrieved chunks via `agent/canvas.py:814`, which routes them through `chunks_format()`. 2. `rag/prompts/generator.py:61` canonicalizes the field with `chunk.get("document_metadata")` (no default), so chunks without metadata become `{"document_metadata": None, ...}`. 3. `agent/component/agent_with_tools.py:314` feeds those canonicalized chunks back into `kb_prompt()` for citation generation, and `.get("document_metadata", {})` no longer protects us. **Fix.** One-line change at `rag/prompts/generator.py:132`: use `ck.get("document_metadata") or {}` so an explicit `None` is also coerced to `{}`. The line-61 `None` is intentionally part of the API/UI contract — the frontend handles it via optional chaining (`web/src/components/markdown-content/index.tsx:184`, `web/src/pages/next-search/search-view.tsx:217`) — so the fix belongs at the consumer, not the producer. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
This commit is contained in:
@@ -129,7 +129,7 @@ def kb_prompt(kbinfos, max_tokens, hash_id=False):
|
||||
cnt = "\nID: {}".format(i if not hash_id else hash_str2int(get_value(ck, "id", "chunk_id"), 500))
|
||||
cnt += draw_node("Title", get_value(ck, "docnm_kwd", "document_name"))
|
||||
cnt += draw_node("URL", ck.get('url', ''))
|
||||
meta = ck.get("document_metadata", {})
|
||||
meta = ck.get("document_metadata") or {}
|
||||
for k, v in meta.items():
|
||||
cnt += draw_node(k, v)
|
||||
cnt += "\n└── Content:\n"
|
||||
|
||||
87
test/unit_test/rag/prompts/test_kb_prompt_metadata.py
Normal file
87
test/unit_test/rag/prompts/test_kb_prompt_metadata.py
Normal file
@@ -0,0 +1,87 @@
|
||||
#
|
||||
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import pytest
|
||||
|
||||
from rag.prompts.generator import kb_prompt
|
||||
|
||||
|
||||
@pytest.mark.p1
|
||||
class TestKbPromptDocumentMetadata:
|
||||
"""Regression tests for kb_prompt's handling of `document_metadata` on chunks."""
|
||||
|
||||
@pytest.mark.p1
|
||||
def test_null_document_metadata_does_not_crash(self):
|
||||
"""A chunk with `document_metadata: None` must not raise AttributeError.
|
||||
|
||||
Regression for issue #14651: chunks retrieved from the index can carry
|
||||
an explicit null metadata field, which made `dict.get(..., {})` return
|
||||
`None` and crash citation generation with
|
||||
`AttributeError: 'NoneType' object has no attribute 'items'`.
|
||||
"""
|
||||
kbinfos = {
|
||||
"chunks": [
|
||||
{
|
||||
"id": "chunk-1",
|
||||
"content_with_weight": "hello world",
|
||||
"docnm_kwd": "doc.pdf",
|
||||
"document_metadata": None,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
rendered = kb_prompt(kbinfos, max_tokens=10000)
|
||||
|
||||
assert len(rendered) == 1
|
||||
assert "hello world" in rendered[0]
|
||||
assert "doc.pdf" in rendered[0]
|
||||
|
||||
@pytest.mark.p1
|
||||
def test_missing_document_metadata_key(self):
|
||||
"""A chunk with no `document_metadata` key at all should also work."""
|
||||
kbinfos = {
|
||||
"chunks": [
|
||||
{
|
||||
"id": "chunk-1",
|
||||
"content_with_weight": "hello world",
|
||||
"docnm_kwd": "doc.pdf",
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
rendered = kb_prompt(kbinfos, max_tokens=10000)
|
||||
|
||||
assert len(rendered) == 1
|
||||
assert "hello world" in rendered[0]
|
||||
|
||||
@pytest.mark.p1
|
||||
def test_populated_document_metadata_renders_fields(self):
|
||||
"""When metadata is a dict, its key/value pairs must be rendered."""
|
||||
kbinfos = {
|
||||
"chunks": [
|
||||
{
|
||||
"id": "chunk-1",
|
||||
"content_with_weight": "hello world",
|
||||
"docnm_kwd": "doc.pdf",
|
||||
"document_metadata": {"author": "alice", "year": "2026"},
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
rendered = kb_prompt(kbinfos, max_tokens=10000)
|
||||
|
||||
assert len(rendered) == 1
|
||||
assert "author: alice" in rendered[0]
|
||||
assert "year: 2026" in rendered[0]
|
||||
Reference in New Issue
Block a user