Fix: update docs generator (#14070)

### What problem does this PR solve?

Refactor: update docs generator

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

1. Support multiple document generator components and correctly display
messages in the message component. The document generator will not
overwrite other messages.

<img width="700" alt="Screenshot from 2026-04-13 13-56-17"
src="https://github.com/user-attachments/assets/3f3e06e8-33ce-4df1-8b05-510c86af70a4"
/>

2. Support Chinese content and ensure correct Markdown rendering in PDF
and DOCX
<img width="700" alt="image"
src="https://github.com/user-attachments/assets/69bf1f7b-261d-48e5-a9f3-8e94462b90ed"
/>

3. Simplify configuration page and support more output format
 
<img height="700" alt="image"
src="https://github.com/user-attachments/assets/8647374c-c055-4daa-ad71-cd9052eb138e"
/>

4. Hide download from other components except for message 
<img width="700" alt="image"
src="https://github.com/user-attachments/assets/a723dfcb-b60d-4eb5-b2f6-d41ca5955eb4"
/>

<img width="700" alt="image"
src="https://github.com/user-attachments/assets/a8762ac4-807b-4f0b-9287-65f82f7c9c98"
/>

5. Sanitize filename
 
<img width="700" alt="image"
src="https://github.com/user-attachments/assets/df49509f-37c0-40f9-b03d-bd6ce7fdefa8"
/>


6. And more changes on usability
This commit is contained in:
Magicbook1108
2026-04-14 15:24:43 +08:00
committed by GitHub
parent 1031aebc8f
commit 1376c004a9
39 changed files with 1145 additions and 2688 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -54,6 +54,9 @@ class MessageParam(ComponentParamBase):
self.outputs = {
"content": {
"type": "str"
},
"downloads": {
"type": "list"
}
}
@@ -66,10 +69,66 @@ class MessageParam(ComponentParamBase):
class Message(ComponentBase):
component_name = "Message"
@staticmethod
def _is_download_info(value: Any) -> bool:
return isinstance(value, dict) and all(
key in value for key in ("doc_id", "filename", "mime_type")
)
def _extract_downloads(self, value: Any) -> list[dict[str, Any]]:
if isinstance(value, str):
try:
value = json.loads(value)
except Exception:
return []
if self._is_download_info(value):
return [value]
if isinstance(value, list) and all(self._is_download_info(item) for item in value):
return value
return []
def _stringify_message_value(
self,
value: Any,
delimiter: str = None,
downloads: list[dict[str, Any]] | None = None,
fallback_to_str: bool = False,
) -> str:
extracted_downloads = self._extract_downloads(value)
if extracted_downloads:
if downloads is not None:
downloads.extend(extracted_downloads)
return ""
if value is None:
return ""
if isinstance(value, list) and delimiter:
return delimiter.join([str(vv) for vv in value])
if isinstance(value, str):
return value
try:
return json.dumps(value, ensure_ascii=False)
except Exception:
if fallback_to_str:
return str(value)
return ""
def get_input_elements(self) -> dict[str, Any]:
return self.get_input_elements_from_text("".join(self._param.content))
def get_kwargs(self, script:str, kwargs:dict = {}, delimiter:str=None) -> tuple[str, dict[str, str | list | Any]]:
def get_kwargs(
self,
script: str,
kwargs: dict = {},
delimiter: str = None,
downloads: list[dict[str, Any]] | None = None,
) -> tuple[str, dict[str, str | list | Any]]:
for k,v in self.get_input_elements_from_text(script).items():
if k in kwargs:
continue
@@ -84,15 +143,8 @@ class Message(ComponentBase):
else:
for t in iter_obj:
ans += t
elif isinstance(v, list) and delimiter:
ans = delimiter.join([str(vv) for vv in v])
elif not isinstance(v, str):
try:
ans = json.dumps(v, ensure_ascii=False)
except Exception:
pass
else:
ans = v
ans = self._stringify_message_value(v, delimiter, downloads)
if not ans:
ans = ""
kwargs[k] = ans
@@ -115,6 +167,7 @@ class Message(ComponentBase):
s = 0
all_content = ""
cache = {}
downloads = []
for r in re.finditer(self.variable_ref_patt, rand_cnt, flags=re.DOTALL):
if self.check_if_canceled("Message streaming"):
return
@@ -154,11 +207,9 @@ class Message(ComponentBase):
continue
elif inspect.isawaitable(v):
v = await v
elif not isinstance(v, str):
try:
v = json.dumps(v, ensure_ascii=False)
except Exception:
v = str(v)
v = self._stringify_message_value(
v, downloads=downloads, fallback_to_str=True
)
yield v
self.set_input_value(exp, v)
all_content += v
@@ -171,6 +222,7 @@ class Message(ComponentBase):
all_content += rand_cnt[s: ]
yield rand_cnt[s: ]
self.set_output("downloads", downloads)
self.set_output("content", all_content)
self._convert_content(all_content)
await self._save_to_memory(all_content)
@@ -191,12 +243,14 @@ class Message(ComponentBase):
self.set_output("content", partial(self._stream, rand_cnt))
return
rand_cnt, kwargs = self.get_kwargs(rand_cnt, kwargs)
downloads = []
rand_cnt, kwargs = self.get_kwargs(rand_cnt, kwargs, downloads=downloads)
template = _jinja2_sandbox.from_string(rand_cnt)
try:
content = template.render(kwargs)
except Exception:
pass
except Exception as e:
logging.warning(f"Jinja2 template rendering failed: {e}")
content = rand_cnt # fallback to unrendered content
if self.check_if_canceled("Message processing"):
return
@@ -204,6 +258,7 @@ class Message(ComponentBase):
for n, v in kwargs.items():
content = re.sub(n, v, content)
self.set_output("downloads", downloads)
self.set_output("content", content)
self._convert_content(content)
self._save_to_memory(content)

View File

@@ -22,6 +22,7 @@ import re
COMPONENT_RENAMES = {
"Splitter": "TokenChunker",
"HierarchicalMerger": "TitleChunker",
"PDFGenerator": "DocGenerator",
}
NODE_TYPE_RENAMES = {