From e7d45dd6453ca01447d4b14aa0f433d8bf89d960 Mon Sep 17 00:00:00 2001 From: nickmopen Date: Mon, 25 May 2026 11:05:00 +0300 Subject: [PATCH] Feat: Expose Doc Generator file metadata as discrete outputs (#15080) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Declare doc_id, filename, mime_type, and size as separate outputs on the Document Generation component so downstream nodes (e.g., the Code component) can consume them via the variable picker. The existing download JSON blob is preserved unchanged for the Message component's download-chip rendering. ### What problem does this PR solve? The Document Generation component previously exposed only a single `download` output — a JSON-encoded blob containing the file's `doc_id`, `filename`, `mime_type`, `size`, and base64 payload. On top of that, the variable picker actively hides this `download` entry from every consumer except the Message component (because the embedded base64 is too heavy to splat into arbitrary downstream nodes). The combined effect: users wiring the Doc Generator's output into a Code component had no way to retrieve basic file info such as `file_name` or `doc_id` from the picker, blocking workflows that need to post-process the generated file (e.g., registering it elsewhere, custom delivery, follow-up API calls). This PR declares `doc_id`, `filename`, `mime_type`, and `size` as **discrete outputs** on the Document Generation component, alongside the existing `download` blob. The new fields: - Appear in the variable picker for **all** downstream nodes, including the Code component, so users can bind them directly to script arguments. - Are cheap scalars only — no base64 payload leaks into other components. - Leave the existing `download` JSON blob completely untouched, so the Message component's download-chip rendering (which parses that blob via `_is_download_info`) keeps working with no behavior change. Changes: - `agent/component/docs_generator.py` — declare the four new outputs in `DocGeneratorParam` and emit them via `set_output(...)` in `_invoke`. - `web/src/pages/agent/constant/index.tsx` — extend `initialDocGeneratorValues.outputs` with the new keys. - `web/src/pages/agent/form/doc-generator-form/index.tsx` — mirror the new outputs in the zod schema so the form is valid. No changes needed to the picker's existing `download`-hiding filter — it matches only on the literal output name `download`, so the new metadata entries fall through naturally. Reported in: https://github.com/infiniflow/ragflow/issues/14461. ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- agent/component/docs_generator.py | 8 ++++++++ web/src/pages/agent/constant/index.tsx | 4 ++++ web/src/pages/agent/form/doc-generator-form/index.tsx | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/agent/component/docs_generator.py b/agent/component/docs_generator.py index ce7a3abad5..2809a9b1ca 100644 --- a/agent/component/docs_generator.py +++ b/agent/component/docs_generator.py @@ -52,6 +52,10 @@ class DocGeneratorParam(ComponentParamBase): self.include_download_info_in_content = False self.font_size = 12 self.outputs = { + "doc_id": {"value": "", "type": "string"}, + "filename": {"value": "", "type": "string"}, + "mime_type": {"value": "", "type": "string"}, + "size": {"value": 0, "type": "number"}, "download": {"value": "", "type": "string"}, } @@ -134,6 +138,10 @@ class DocGenerator(Message, ABC): "base64": file_base64, "include_download_info_in_content": self._param.include_download_info_in_content, } + self.set_output("doc_id", doc_id) + self.set_output("filename", filename) + self.set_output("mime_type", mime_type) + self.set_output("size", file_size) self.set_output("download", json.dumps(download_info)) return download_info diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx index 1c02500706..fa169a526a 100644 --- a/web/src/pages/agent/constant/index.tsx +++ b/web/src/pages/agent/constant/index.tsx @@ -978,6 +978,10 @@ export const initialDocGeneratorValues = { include_download_info_in_content: false, font_size: 12, outputs: { + doc_id: { type: 'string' }, + filename: { type: 'string' }, + mime_type: { type: 'string' }, + size: { type: 'number' }, download: { type: 'string' }, }, }; diff --git a/web/src/pages/agent/form/doc-generator-form/index.tsx b/web/src/pages/agent/form/doc-generator-form/index.tsx index 56faf96535..7ec137f6ec 100644 --- a/web/src/pages/agent/form/doc-generator-form/index.tsx +++ b/web/src/pages/agent/form/doc-generator-form/index.tsx @@ -38,6 +38,10 @@ function DocGeneratorForm({ node }: INextOperatorForm) { include_download_info_in_content: z.boolean(), font_size: z.coerce.number().min(12, 'Font size must be at least 12'), outputs: z.object({ + doc_id: z.object({ type: z.string() }), + filename: z.object({ type: z.string() }), + mime_type: z.object({ type: z.string() }), + size: z.object({ type: z.string() }), download: z.object({ type: z.string() }), }), });