mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
fix(agent/tools): PubMed tool always returns "Unknown Authors" (#16330)
### What problem does this PR solve? Fixes the PubMed tool always emitting `Authors: Unknown Authors`. The `safe_find` closure in `_format_pubmed_content` was hardcoded to search from the article root, so the per-author `LastName`/`ForeName` lookups never matched. `safe_find` now accepts an optional `base` node (defaults to `child`, preserving the existing field lookups), and the author loop passes the current `<Author>` element. Closes #16328 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Add test cases ### Testing Added `test/testcases/test_web_api/test_canvas_app/test_pubmed_unit.py` covering per-author parsing, intact title/journal/DOI fields, and the no-authors fallback. Before: `Authors: Unknown Authors` After: `Authors: Furqan Khan, Jane Smith`
This commit is contained in:
@@ -117,8 +117,8 @@ class PubMed(ToolBase, ABC):
|
||||
|
||||
def _format_pubmed_content(self, child):
|
||||
"""Extract structured reference info from PubMed XML"""
|
||||
def safe_find(path):
|
||||
node = child
|
||||
def safe_find(path, base=None):
|
||||
node = child if base is None else base
|
||||
for p in path.split("/"):
|
||||
if node is None:
|
||||
return None
|
||||
@@ -135,8 +135,8 @@ class PubMed(ToolBase, ABC):
|
||||
# Authors
|
||||
authors = []
|
||||
for author in child.findall(".//AuthorList/Author"):
|
||||
lastname = safe_find("LastName") or ""
|
||||
forename = safe_find("ForeName") or ""
|
||||
lastname = safe_find("LastName", author) or ""
|
||||
forename = safe_find("ForeName", author) or ""
|
||||
fullname = f"{forename} {lastname}".strip()
|
||||
if fullname:
|
||||
authors.append(fullname)
|
||||
|
||||
Reference in New Issue
Block a user