From 1376c004a9372b6999882d57aae1832b2cc70996 Mon Sep 17 00:00:00 2001 From: Magicbook1108 Date: Tue, 14 Apr 2026 15:24:43 +0800 Subject: [PATCH] Fix: update docs generator (#14070) ### What problem does this PR solve? Refactor: update docs generator ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) 1. Support multiple document generator components and correctly display messages in the message component. The document generator will not overwrite other messages. Screenshot from 2026-04-13 13-56-17 2. Support Chinese content and ensure correct Markdown rendering in PDF and DOCX image 3. Simplify configuration page and support more output format image 4. Hide download from other components except for message image image 5. Sanitize filename image 6. And more changes on usability --- Dockerfile | 3 +- agent/component/docs_generator.py | 2042 +++++------------ agent/component/message.py | 89 +- agent/dsl_migration.py | 1 + api/apps/user_app.py | 1 - .../docs_generator.md | 241 -- .../document-download-button/index.tsx | 88 + web/src/components/message-item/index.tsx | 43 +- .../components/next-message-item/index.tsx | 51 +- .../components/pdf-download-button/index.tsx | 196 -- web/src/constants/agent.tsx | 2 +- web/src/interfaces/database/chat.ts | 9 + web/src/locales/ar.ts | 8 +- web/src/locales/bg.ts | 6 +- web/src/locales/de.ts | 6 +- web/src/locales/en.ts | 6 +- web/src/locales/es.ts | 6 +- web/src/locales/fr.ts | 6 +- web/src/locales/id.ts | 6 +- web/src/locales/it.ts | 6 +- web/src/locales/ja.ts | 6 +- web/src/locales/pt-br.ts | 6 +- web/src/locales/ru.ts | 6 +- web/src/locales/tr.ts | 6 +- web/src/locales/vi.ts | 6 +- web/src/locales/zh-traditional.ts | 6 +- web/src/locales/zh.ts | 6 +- .../node/dropdown/accordion-operators.tsx | 2 +- .../agent/chat/use-send-agent-message.ts | 4 +- web/src/pages/agent/constant/index.tsx | 62 +- .../agent/form-sheet/form-config-map.tsx | 6 +- .../agent/form/doc-generator-form/index.tsx | 254 ++ .../form/doc-generator-form/use-values.ts | 30 + .../use-watch-form-change.ts | 0 .../agent/form/pdf-generator-form/index.tsx | 536 ----- .../form/pdf-generator-form/use-values.ts | 11 - web/src/pages/agent/hooks/use-add-node.ts | 4 +- .../pages/agent/hooks/use-get-begin-query.tsx | 64 +- web/src/pages/agent/operator-icon.tsx | 2 +- 39 files changed, 1145 insertions(+), 2688 deletions(-) delete mode 100644 docs/guides/agent/agent_component_reference/docs_generator.md create mode 100644 web/src/components/document-download-button/index.tsx delete mode 100644 web/src/components/pdf-download-button/index.tsx create mode 100644 web/src/pages/agent/form/doc-generator-form/index.tsx create mode 100644 web/src/pages/agent/form/doc-generator-form/use-values.ts rename web/src/pages/agent/form/{pdf-generator-form => doc-generator-form}/use-watch-form-change.ts (100%) delete mode 100644 web/src/pages/agent/form/pdf-generator-form/index.tsx delete mode 100644 web/src/pages/agent/form/pdf-generator-form/use-values.ts diff --git a/Dockerfile b/Dockerfile index b89cb266a0..fdc5f4c4bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,7 +42,8 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \ chmod 1777 /tmp && \ apt update && \ - apt install -y build-essential libglib2.0-0 libglx-mesa0 libgl1 pkg-config libicu-dev libgdiplus default-jdk libatk-bridge2.0-0 libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive fonts-freefont-ttf fonts-noto-cjk postgresql-client + apt install -y \ + build-essential libglib2.0-0 libglx-mesa0 libgl1 pkg-config libicu-dev libgdiplus default-jdk libatk-bridge2.0-0 libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive texlive-latex-extra texlive-xetex texlive-lang-chinese fonts-freefont-ttf fonts-noto-cjk postgresql-client # Download resource from GitHub to /usr/share/infinity RUN mkdir -p /usr/share/infinity/resource && \ diff --git a/agent/component/docs_generator.py b/agent/component/docs_generator.py index a3f165a552..3ab02c4cda 100644 --- a/agent/component/docs_generator.py +++ b/agent/component/docs_generator.py @@ -1,1570 +1,632 @@ +import logging import json import os import re -import base64 -from datetime import datetime +import shutil +import tempfile from abc import ABC -from io import BytesIO -from typing import Optional +from datetime import datetime from functools import partial -from reportlab.lib.pagesizes import A4 -from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle -from reportlab.lib.units import inch -from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY -from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, TableStyle, LongTable -from reportlab.lib import colors -from reportlab.pdfbase import pdfmetrics -from reportlab.pdfbase.ttfonts import TTFont -from reportlab.pdfbase.cidfonts import UnicodeCIDFont +from io import BytesIO +from xml.sax.saxutils import escape from agent.component.base import ComponentParamBase from api.utils.api_utils import timeout +from common import settings +from common.misc_utils import get_uuid from .message import Message -class PDFGeneratorParam(ComponentParamBase): +def sanitize_filename(name: str, extension: str) -> str: + if not name: + return f"file.{extension}" + + name = str(name).strip() + name = re.sub(r'[\\/\x00-\x1f\?\#\%\*\:\|\<\>"]', " ", name) + name = re.sub(r"\s+", " ", name).strip(" .") + + if not name: + return f"file.{extension}" + + base, _ = os.path.splitext(name) + base = base[:180].rstrip() or "file" + return f"{base}.{extension}" + + +class DocGeneratorParam(ComponentParamBase): """ - Define the PDF Generator component parameters. + Define the Docs Generator component parameters. """ def __init__(self): super().__init__() - # Output format - self.output_format = "pdf" # pdf, docx, txt - - # Content inputs + self.output_format = "pdf" # pdf, docx, txt, markdown, html self.content = "" - self.title = "" - self.subtitle = "" + self.filename = "" self.header_text = "" self.footer_text = "" - - # Images - self.logo_image = "" # base64 or file path - self.logo_position = "left" # left, center, right - self.logo_width = 2.0 # inches - self.logo_height = 1.0 # inches - - # Styling - self.font_family = "Helvetica" # Helvetica, Times-Roman, Courier - self.font_size = 12 - self.title_font_size = 24 - self.heading1_font_size = 18 - self.heading2_font_size = 16 - self.heading3_font_size = 14 - self.text_color = "#000000" - self.title_color = "#000000" - - # Page settings - self.page_size = "A4" - self.orientation = "portrait" # portrait, landscape - self.margin_top = 1.0 # inches - self.margin_bottom = 1.0 - self.margin_left = 1.0 - self.margin_right = 1.0 - self.line_spacing = 1.2 - - # Output settings - self.filename = "" - self.output_directory = "/tmp/pdf_outputs" + self.watermark_text = "" self.add_page_numbers = True self.add_timestamp = True - - # Advanced features - self.watermark_text = "" - self.enable_toc = False - + self.font_size = 12 self.outputs = { - "file_path": {"value": "", "type": "string"}, - "pdf_base64": {"value": "", "type": "string"}, "download": {"value": "", "type": "string"}, - "success": {"value": False, "type": "boolean"} } def check(self): - self.check_empty(self.content, "[PDFGenerator] Content") - self.check_valid_value(self.output_format, "[PDFGenerator] Output format", ["pdf", "docx", "txt"]) - self.check_valid_value(self.logo_position, "[PDFGenerator] Logo position", ["left", "center", "right"]) - self.check_valid_value(self.font_family, "[PDFGenerator] Font family", - ["Helvetica", "Times-Roman", "Courier", "Helvetica-Bold", "Times-Bold"]) - self.check_valid_value(self.page_size, "[PDFGenerator] Page size", ["A4", "Letter"]) - self.check_valid_value(self.orientation, "[PDFGenerator] Orientation", ["portrait", "landscape"]) - self.check_positive_number(self.font_size, "[PDFGenerator] Font size") - self.check_positive_number(self.margin_top, "[PDFGenerator] Margin top") + self.check_empty(self.content, "[DocGenerator] Content") + self.check_valid_value( + self.output_format, + "[DocGenerator] Output format", + ["pdf", "docx", "txt", "markdown", "html"], + ) + self.check_positive_number(self.font_size, "[DocGenerator] Font size") + if self.font_size < 12: + raise ValueError("[DocGenerator] Font size must be greater than or equal to 12") -class PDFGenerator(Message, ABC): - component_name = "PDFGenerator" - - # Track if Unicode fonts have been registered - _unicode_fonts_registered = False - _unicode_font_name = None - _unicode_font_bold_name = None - - @classmethod - def _reset_font_cache(cls): - """Reset font registration cache - useful for testing""" - cls._unicode_fonts_registered = False - cls._unicode_font_name = None - cls._unicode_font_bold_name = None - - @classmethod - def _register_unicode_fonts(cls): - """Register Unicode-compatible fonts for multi-language support. - - Uses CID fonts (STSong-Light) for reliable CJK rendering as TTF fonts - have issues with glyph mapping in some ReportLab versions. - """ - # If already registered successfully, return True - if cls._unicode_fonts_registered and cls._unicode_font_name is not None: - return True - - # Reset and try again if previous registration failed - cls._unicode_fonts_registered = True - cls._unicode_font_name = None - cls._unicode_font_bold_name = None - - # Use CID fonts for reliable CJK support - # These are built into ReportLab and work reliably across all platforms - cid_fonts = [ - 'STSong-Light', # Simplified Chinese - 'HeiseiMin-W3', # Japanese - 'HYSMyeongJo-Medium', # Korean - ] - - for cid_font in cid_fonts: - try: - pdfmetrics.registerFont(UnicodeCIDFont(cid_font)) - cls._unicode_font_name = cid_font - cls._unicode_font_bold_name = cid_font # CID fonts don't have bold variants - print(f"Registered CID font: {cid_font}") - break - except Exception as e: - print(f"Failed to register CID font {cid_font}: {e}") - continue - - # If CID fonts fail, try TTF fonts as fallback - if not cls._unicode_font_name: - font_paths = [ - '/usr/share/fonts/truetype/freefont/FreeSans.ttf', - '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', - ] - - for font_path in font_paths: - if os.path.exists(font_path): - try: - pdfmetrics.registerFont(TTFont('UnicodeFont', font_path)) - cls._unicode_font_name = 'UnicodeFont' - cls._unicode_font_bold_name = 'UnicodeFont' - print(f"Registered TTF font from: {font_path}") - - # Register font family - from reportlab.pdfbase.pdfmetrics import registerFontFamily - registerFontFamily('UnicodeFont', normal='UnicodeFont', bold='UnicodeFont') - break - except Exception as e: - print(f"Failed to register TTF font {font_path}: {e}") - continue - - return cls._unicode_font_name is not None - - @staticmethod - def _needs_unicode_font(text: str) -> bool: - """Check if text contains CJK or other complex scripts that need special fonts. - - Standard PDF fonts (Helvetica, Times, Courier) support: - - Basic Latin, Extended Latin, Cyrillic, Greek - - CID fonts are needed for: - - CJK (Chinese, Japanese, Korean) - - Arabic, Hebrew (RTL scripts) - - Thai, Hindi, and other Indic scripts - """ - if not text: - return False - - for char in text: - code = ord(char) - - # CJK Unified Ideographs and related ranges - if 0x4E00 <= code <= 0x9FFF: # CJK Unified Ideographs - return True - if 0x3400 <= code <= 0x4DBF: # CJK Extension A - return True - if 0x3000 <= code <= 0x303F: # CJK Symbols and Punctuation - return True - if 0x3040 <= code <= 0x309F: # Hiragana - return True - if 0x30A0 <= code <= 0x30FF: # Katakana - return True - if 0xAC00 <= code <= 0xD7AF: # Hangul Syllables - return True - if 0x1100 <= code <= 0x11FF: # Hangul Jamo - return True - - # Arabic and Hebrew (RTL scripts) - if 0x0600 <= code <= 0x06FF: # Arabic - return True - if 0x0590 <= code <= 0x05FF: # Hebrew - return True - - # Indic scripts - if 0x0900 <= code <= 0x097F: # Devanagari (Hindi) - return True - if 0x0E00 <= code <= 0x0E7F: # Thai - return True - - return False - - def _get_font_for_content(self, content: str) -> tuple: - """Get appropriate font based on content, returns (regular_font, bold_font)""" - if self._needs_unicode_font(content): - if self._register_unicode_fonts() and self._unicode_font_name: - return (self._unicode_font_name, self._unicode_font_bold_name or self._unicode_font_name) - else: - print("Warning: Content contains non-Latin characters but no Unicode font available") - - # Fall back to configured font - return (self._param.font_family, self._get_bold_font_name()) - - def _get_active_font(self) -> str: - """Get the currently active font (Unicode or configured)""" - return getattr(self, '_active_font', self._param.font_family) - - def _get_active_bold_font(self) -> str: - """Get the currently active bold font (Unicode or configured)""" - return getattr(self, '_active_bold_font', self._get_bold_font_name()) - - def _get_bold_font_name(self) -> str: - """Get the correct bold variant of the current font family""" - font_map = { - 'Helvetica': 'Helvetica-Bold', - 'Times-Roman': 'Times-Bold', - 'Courier': 'Courier-Bold', - } - font_family = getattr(self._param, 'font_family', 'Helvetica') - if 'Bold' in font_family: - return font_family - return font_map.get(font_family, 'Helvetica-Bold') +class DocGenerator(Message, ABC): + component_name = "DocGenerator" + _default_output_directory = os.path.join(tempfile.gettempdir(), "doc_outputs") + _overlay_margin = 36 + _overlay_font_size = 9 + _pdf_main_font = "Noto Sans CJK SC" + _pdf_cjk_font = "Noto Sans CJK SC" + _pdf_overlay_font = "STSong-Light" def get_input_form(self) -> dict[str, dict]: return { "content": { "name": "Content", - "type": "text" - }, - "title": { - "name": "Title", - "type": "line" - }, - "subtitle": { - "name": "Subtitle", - "type": "line" + "type": "text", } } - @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))) + @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60))) def _invoke(self, **kwargs): - import traceback - + file_path = None try: - # Get content from parameters (which may contain variable references) - content = self._param.content or "" - title = self._param.title or "" - subtitle = self._param.subtitle or "" - - # Log PDF generation start - print(f"Starting PDF generation for title: {title}, content length: {len(content)} chars") - - # Resolve variable references in content using canvas - if content and self._canvas.is_reff(content.strip()): - # Extract the variable reference and get its value - import re - matches = re.findall(self.variable_ref_patt, content, flags=re.DOTALL) - for match in matches: - try: - var_value = self._canvas.get_variable_value(match) - if var_value: - # Handle partial (streaming) content - if isinstance(var_value, partial): - resolved_content = "" - for chunk in var_value(): - resolved_content += chunk - content = content.replace("{" + match + "}", resolved_content) - else: - content = content.replace("{" + match + "}", str(var_value)) - except Exception as e: - print(f"Error resolving variable {match}: {str(e)}") - content = content.replace("{" + match + "}", f"[ERROR: {str(e)}]") - - # Also process with get_kwargs for any remaining variables - if content: - try: - content, _ = self.get_kwargs(content, kwargs) - except Exception as e: - print(f"Error processing content with get_kwargs: {str(e)}") - - # Process template variables in title - if title and self._canvas.is_reff(title): - try: - matches = re.findall(self.variable_ref_patt, title, flags=re.DOTALL) - for match in matches: - var_value = self._canvas.get_variable_value(match) - if var_value: - title = title.replace("{" + match + "}", str(var_value)) - except Exception as e: - print(f"Error processing title variables: {str(e)}") - - if title: - try: - title, _ = self.get_kwargs(title, kwargs) - except Exception: - pass - - # Process template variables in subtitle - if subtitle and self._canvas.is_reff(subtitle): - try: - matches = re.findall(self.variable_ref_patt, subtitle, flags=re.DOTALL) - for match in matches: - var_value = self._canvas.get_variable_value(match) - if var_value: - subtitle = subtitle.replace("{" + match + "}", str(var_value)) - except Exception as e: - print(f"Error processing subtitle variables: {str(e)}") - - if subtitle: - try: - subtitle, _ = self.get_kwargs(subtitle, kwargs) - except Exception: - pass - - # If content is still empty, check if it was passed directly - if not content: - content = kwargs.get("content", "") - - # Generate document based on format + content = self._resolve_content(kwargs) + output_format = self._param.output_format or "pdf" + try: - output_format = self._param.output_format or "pdf" - if output_format == "pdf": - file_path, doc_base64 = self._generate_pdf(content, title, subtitle) + file_path, file_bytes = self._generate_pdf(content) mime_type = "application/pdf" elif output_format == "docx": - file_path, doc_base64 = self._generate_docx(content, title, subtitle) + file_path, file_bytes = self._generate_docx(content) mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" elif output_format == "txt": - file_path, doc_base64 = self._generate_txt(content, title, subtitle) + file_path, file_bytes = self._generate_txt(content) mime_type = "text/plain" + elif output_format == "markdown": + file_path, file_bytes = self._generate_markdown(content) + mime_type = "text/markdown" + elif output_format == "html": + file_path, file_bytes = self._generate_html(content) + mime_type = "text/html" else: raise Exception(f"Unsupported output format: {output_format}") - + filename = os.path.basename(file_path) - - # Verify the file was created and has content - if not os.path.exists(file_path): - raise Exception(f"Document file was not created: {file_path}") - - file_size = os.path.getsize(file_path) - if file_size == 0: - raise Exception(f"Document file is empty: {file_path}") - - print(f"Successfully generated {output_format.upper()}: {file_path} (Size: {file_size} bytes)") - - # Set outputs - self.set_output("file_path", file_path) - self.set_output("pdf_base64", doc_base64) # Keep same output name for compatibility - self.set_output("success", True) - - # Create download info object + if not file_bytes: + raise Exception("Document file is empty") + + file_size = len(file_bytes) + doc_id = get_uuid() + settings.STORAGE_IMPL.put(self._canvas.get_tenant_id(), doc_id, file_bytes) + + logging.info( + "Successfully generated %s: %s (Size: %s bytes)", + output_format.upper(), + filename, + file_size, + ) + download_info = { + "doc_id": doc_id, "filename": filename, - "path": file_path, - "base64": doc_base64, "mime_type": mime_type, - "size": file_size + "size": file_size, } - # Output download info as JSON string so it can be used in Message block - download_json = json.dumps(download_info) - self.set_output("download", download_json) - + self.set_output("download", json.dumps(download_info)) return download_info - + except Exception as e: - error_msg = f"Error in _generate_pdf: {str(e)}\n{traceback.format_exc()}" - print(error_msg) - self.set_output("success", False) - self.set_output("_ERROR", f"PDF generation failed: {str(e)}") + logging.exception("Error generating %s document", output_format) + self.set_output("_ERROR", f"Document generation failed: {str(e)}") raise - + except Exception as e: - error_msg = f"Error in PDFGenerator._invoke: {str(e)}\n{traceback.format_exc()}" - print(error_msg) - self.set_output("success", False) - self.set_output("_ERROR", f"PDF generation failed: {str(e)}") + logging.exception("Error in DocGenerator._invoke") + self.set_output("_ERROR", f"Document generation failed: {str(e)}") raise - - def _generate_pdf(self, content: str, title: str = "", subtitle: str = "") -> tuple[str, str]: - """Generate PDF from markdown-style content with improved error handling and concurrency support""" - import uuid - import traceback - - # Create output directory if it doesn't exist - os.makedirs(self._param.output_directory, exist_ok=True) - - # Initialize variables that need cleanup - buffer = None - temp_file_path = None - file_path = None - - try: - # Generate a unique filename to prevent conflicts - if self._param.filename: - base_name = os.path.splitext(self._param.filename)[0] - filename = f"{base_name}_{uuid.uuid4().hex[:8]}.pdf" - else: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"document_{timestamp}_{uuid.uuid4().hex[:8]}.pdf" - - file_path = os.path.join(self._param.output_directory, filename) - temp_file_path = f"{file_path}.tmp" - - # Setup page size - page_size = A4 - if self._param.orientation == "landscape": - page_size = (A4[1], A4[0]) - - # Create PDF buffer and document - buffer = BytesIO() - doc = SimpleDocTemplate( - buffer, - pagesize=page_size, - topMargin=self._param.margin_top * inch, - bottomMargin=self._param.margin_bottom * inch, - leftMargin=self._param.margin_left * inch, - rightMargin=self._param.margin_right * inch - ) - - # Build story (content elements) - story = [] - # Combine all text content for Unicode font detection - all_text = f"{title} {subtitle} {content}" - - # IMPORTANT: Register Unicode fonts BEFORE creating any styles or Paragraphs - # This ensures the font family is available for ReportLab's HTML parser - if self._needs_unicode_font(all_text): - self._register_unicode_fonts() - - styles = self._create_styles(all_text) - - # Add logo if provided - if self._param.logo_image: - logo = self._add_logo() - if logo: - story.append(logo) - story.append(Spacer(1, 0.3 * inch)) - - # Add title - if title: - title_para = Paragraph(self._escape_html(title), styles['PDFTitle']) - story.append(title_para) - story.append(Spacer(1, 0.2 * inch)) - - # Add subtitle - if subtitle: - subtitle_para = Paragraph(self._escape_html(subtitle), styles['PDFSubtitle']) - story.append(subtitle_para) - story.append(Spacer(1, 0.3 * inch)) - - # Add timestamp if enabled - if self._param.add_timestamp: - timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - timestamp_para = Paragraph(timestamp_text, styles['Italic']) - story.append(timestamp_para) - story.append(Spacer(1, 0.2 * inch)) - - # Parse and add content - content_elements = self._parse_markdown_content(content, styles) - story.extend(content_elements) - - # Build PDF - doc.build(story, onFirstPage=self._add_page_decorations, onLaterPages=self._add_page_decorations) - - # Get PDF bytes - pdf_bytes = buffer.getvalue() - - # Write to temporary file first - with open(temp_file_path, 'wb') as f: - f.write(pdf_bytes) - - # Atomic rename to final filename (works across different filesystems) - if os.path.exists(file_path): - os.remove(file_path) - os.rename(temp_file_path, file_path) - - # Verify the file was created and has content - if not os.path.exists(file_path): - raise Exception(f"Failed to create output file: {file_path}") - - file_size = os.path.getsize(file_path) - if file_size == 0: - raise Exception(f"Generated PDF is empty: {file_path}") - - # Convert to base64 - pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8') - - return file_path, pdf_base64 - - except Exception as e: - # Clean up any temporary files on error - if temp_file_path and os.path.exists(temp_file_path): - try: - os.remove(temp_file_path) - except Exception as cleanup_error: - print(f"Error cleaning up temporary file: {cleanup_error}") - - error_msg = f"Error generating PDF: {str(e)}\n{traceback.format_exc()}" - print(error_msg) - raise Exception(f"PDF generation failed: {str(e)}") - finally: - # Ensure buffer is always closed - if buffer is not None: + if file_path and os.path.exists(file_path): + os.remove(file_path) + + def _resolve_content(self, kwargs: dict) -> str: + content = self._param.content or "" + logging.info("Starting document generation, content length: %s chars", len(content)) + + if content and self._canvas.is_reff(content.strip()): + matches = re.findall(self.variable_ref_patt, content, flags=re.DOTALL) + for match in matches: try: - buffer.close() - except Exception as close_error: - print(f"Error closing buffer: {close_error}") - - def _create_styles(self, content: str = ""): - """Create custom paragraph styles with Unicode font support if needed""" - # Check if content contains CJK characters that need special fonts - needs_cjk = self._needs_unicode_font(content) - - if needs_cjk: - # Use CID fonts for CJK content - if self._register_unicode_fonts() and self._unicode_font_name: - regular_font = self._unicode_font_name - bold_font = self._unicode_font_bold_name or self._unicode_font_name - print(f"Using CID font for CJK content: {regular_font}") - else: - # Fall back to configured font if CID fonts unavailable - regular_font = self._param.font_family - bold_font = self._get_bold_font_name() - print(f"Warning: CJK content detected but no CID font available, using {regular_font}") - else: - # Use user-selected font for Latin-only content - regular_font = self._param.font_family - bold_font = self._get_bold_font_name() - print(f"Using configured font: {regular_font}") - - # Store active fonts as instance variables for use in other methods - self._active_font = regular_font - self._active_bold_font = bold_font - - # Get fresh style sheet - styles = getSampleStyleSheet() - - # Helper function to get the correct bold font name - def get_bold_font(font_family): - """Get the correct bold variant of a font family""" - # If using Unicode font, return the Unicode bold - if font_family in ('UnicodeFont', self._unicode_font_name): - return bold_font - font_map = { - 'Helvetica': 'Helvetica-Bold', - 'Times-Roman': 'Times-Bold', - 'Courier': 'Courier-Bold', - } - if 'Bold' in font_family: - return font_family - return font_map.get(font_family, 'Helvetica-Bold') - - # Use detected font instead of configured font for non-Latin content - active_font = regular_font - active_bold_font = bold_font - - # Helper function to add or update style - def add_or_update_style(name, **kwargs): - if name in styles: - # Update existing style - style = styles[name] - for key, value in kwargs.items(): - setattr(style, key, value) - else: - # Add new style - styles.add(ParagraphStyle(name=name, **kwargs)) - - # IMPORTANT: Update base styles to use Unicode font for non-Latin content - # This ensures ALL text uses the correct font, not just our custom styles - add_or_update_style('Normal', fontName=active_font) - add_or_update_style('BodyText', fontName=active_font) - add_or_update_style('Bullet', fontName=active_font) - add_or_update_style('Heading1', fontName=active_bold_font) - add_or_update_style('Heading2', fontName=active_bold_font) - add_or_update_style('Heading3', fontName=active_bold_font) - add_or_update_style('Title', fontName=active_bold_font) - - # Title style - add_or_update_style( - 'PDFTitle', - parent=styles['Heading1'], - fontSize=self._param.title_font_size, - textColor=colors.HexColor(self._param.title_color), - fontName=active_bold_font, - alignment=TA_CENTER, - spaceAfter=12 - ) - - # Subtitle style - add_or_update_style( - 'PDFSubtitle', - parent=styles['Heading2'], - fontSize=self._param.heading2_font_size, - textColor=colors.HexColor(self._param.text_color), - fontName=active_font, - alignment=TA_CENTER, - spaceAfter=12 - ) - - # Custom heading styles - add_or_update_style( - 'CustomHeading1', - parent=styles['Heading1'], - fontSize=self._param.heading1_font_size, - fontName=active_bold_font, - textColor=colors.HexColor(self._param.text_color), - spaceAfter=12, - spaceBefore=12 - ) - - add_or_update_style( - 'CustomHeading2', - parent=styles['Heading2'], - fontSize=self._param.heading2_font_size, - fontName=active_bold_font, - textColor=colors.HexColor(self._param.text_color), - spaceAfter=10, - spaceBefore=10 - ) - - add_or_update_style( - 'CustomHeading3', - parent=styles['Heading3'], - fontSize=self._param.heading3_font_size, - fontName=active_bold_font, - textColor=colors.HexColor(self._param.text_color), - spaceAfter=8, - spaceBefore=8 - ) - - # Body text style - add_or_update_style( - 'CustomBody', - parent=styles['BodyText'], - fontSize=self._param.font_size, - fontName=active_font, - textColor=colors.HexColor(self._param.text_color), - leading=self._param.font_size * self._param.line_spacing, - alignment=TA_JUSTIFY - ) - - # Bullet style - add_or_update_style( - 'CustomBullet', - parent=styles['BodyText'], - fontSize=self._param.font_size, - fontName=active_font, - textColor=colors.HexColor(self._param.text_color), - leftIndent=20, - bulletIndent=10 - ) - - # Code style (keep Courier for code blocks) - add_or_update_style( - 'PDFCode', - parent=styles.get('Code', styles['Normal']), - fontSize=self._param.font_size - 1, - fontName='Courier', - textColor=colors.HexColor('#333333'), - backColor=colors.HexColor('#f5f5f5'), - leftIndent=20, - rightIndent=20 - ) - - # Italic style - add_or_update_style( - 'Italic', - parent=styles['Normal'], - fontSize=self._param.font_size, - fontName=active_font, - textColor=colors.HexColor(self._param.text_color) - ) - - return styles - - def _parse_markdown_content(self, content: str, styles): - """Parse markdown-style content and convert to PDF elements""" - elements = [] - lines = content.split('\n') - - i = 0 - while i < len(lines): - line = lines[i].strip() - - # Skip empty lines - if not line: - elements.append(Spacer(1, 0.1 * inch)) - i += 1 - continue - - # Horizontal rule - if line == '---' or line == '___': - elements.append(Spacer(1, 0.1 * inch)) - elements.append(self._create_horizontal_line()) - elements.append(Spacer(1, 0.1 * inch)) - i += 1 - continue - - # Heading 1 - if line.startswith('# ') and not line.startswith('## '): - text = line[2:].strip() - elements.append(Paragraph(self._format_inline(text), styles['CustomHeading1'])) - i += 1 - continue - - # Heading 2 - if line.startswith('## ') and not line.startswith('### '): - text = line[3:].strip() - elements.append(Paragraph(self._format_inline(text), styles['CustomHeading2'])) - i += 1 - continue - - # Heading 3 - if line.startswith('### '): - text = line[4:].strip() - elements.append(Paragraph(self._format_inline(text), styles['CustomHeading3'])) - i += 1 - continue - - # Bullet list - if line.startswith('- ') or line.startswith('* '): - bullet_items = [] - while i < len(lines) and (lines[i].strip().startswith('- ') or lines[i].strip().startswith('* ')): - item_text = lines[i].strip()[2:].strip() - formatted = self._format_inline(item_text) - bullet_items.append(f"• {formatted}") - i += 1 - for item in bullet_items: - elements.append(Paragraph(item, styles['CustomBullet'])) - continue - - # Numbered list - if re.match(r'^\d+\.\s', line): - numbered_items = [] - counter = 1 - while i < len(lines) and re.match(r'^\d+\.\s', lines[i].strip()): - item_text = re.sub(r'^\d+\.\s', '', lines[i].strip()) - numbered_items.append(f"{counter}. {self._format_inline(item_text)}") - counter += 1 - i += 1 - for item in numbered_items: - elements.append(Paragraph(item, styles['CustomBullet'])) - continue - - # Table detection (markdown table must start with |) - if line.startswith('|') and '|' in line: - table_lines = [] - # Collect all consecutive lines that look like table rows - while i < len(lines) and lines[i].strip() and '|' in lines[i]: - table_lines.append(lines[i].strip()) - i += 1 - - # Only process if we have at least 2 lines (header + separator or header + data) - if len(table_lines) >= 2: - table_elements = self._create_table(table_lines) - if table_elements: - # _create_table now returns a list of elements - elements.extend(table_elements) - elements.append(Spacer(1, 0.2 * inch)) - continue - else: - # Not a valid table, treat as regular text - i -= len(table_lines) # Reset position - - # Code block - if line.startswith('```'): - code_lines = [] - i += 1 - while i < len(lines) and not lines[i].strip().startswith('```'): - code_lines.append(lines[i]) - i += 1 - if i < len(lines): - i += 1 - code_text = '\n'.join(code_lines) - elements.append(Paragraph(self._escape_html(code_text), styles['PDFCode'])) - elements.append(Spacer(1, 0.1 * inch)) - continue - - # Regular paragraph - paragraph_lines = [line] - i += 1 - while i < len(lines) and lines[i].strip() and not self._is_special_line(lines[i]): - paragraph_lines.append(lines[i].strip()) - i += 1 - - paragraph_text = ' '.join(paragraph_lines) - formatted_text = self._format_inline(paragraph_text) - elements.append(Paragraph(formatted_text, styles['CustomBody'])) - elements.append(Spacer(1, 0.1 * inch)) - - return elements - - def _is_special_line(self, line: str) -> bool: - """Check if line is a special markdown element""" - line = line.strip() - return (line.startswith('#') or - line.startswith('- ') or - line.startswith('* ') or - re.match(r'^\d+\.\s', line) or - line in ['---', '___'] or - line.startswith('```') or - '|' in line) - - def _format_inline(self, text: str) -> str: - """Format inline markdown (bold, italic, code)""" - # First, escape the existing HTML to not conflict with our tags. - text = self._escape_html(text) - - # IMPORTANT: Process inline code FIRST to protect underscores inside code blocks - # Use a placeholder to protect code blocks from italic/bold processing - code_blocks = [] - def save_code(match): - code_blocks.append(match.group(1)) - return f"__CODE_BLOCK_{len(code_blocks)-1}__" - - text = re.sub(r'`(.+?)`', save_code, text) - - # Then, apply markdown formatting. - # The order is important: from most specific to least specific. - - # Bold and italic combined: ***text*** or ___text___ - text = re.sub(r'\*\*\*(.+?)\*\*\*', r'\1', text) - text = re.sub(r'___(.+?)___', r'\1', text) - - # Bold: **text** or __text__ - text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) - text = re.sub(r'__([^_]+?)__', r'\1', text) # More restrictive to avoid matching placeholders - - # Italic: *text* or _text_ (but not underscores in words like variable_name) - text = re.sub(r'\*([^*]+?)\*', r'\1', text) - # Only match _text_ when surrounded by spaces or at start/end, not mid-word underscores - text = re.sub(r'(?\1', text) - - # Restore code blocks with proper formatting - for i, code in enumerate(code_blocks): - text = text.replace(f"__CODE_BLOCK_{i}__", f'{code}') - - return text - - def _escape_html(self, text: str) -> str: - """Escape HTML special characters and clean up markdown. - - Args: - text: Input text that may contain HTML or markdown - - Returns: - str: Cleaned and escaped text - """ - if not text: - return "" - - # Ensure we're working with a string - text = str(text) - - # Remove HTML form elements and tags - text = re.sub(r']*>', '', text, flags=re.IGNORECASE) # Remove input tags - text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove textarea - text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove select - text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove buttons - text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove forms - - # Remove other common HTML tags (but preserve content) - text = re.sub(r']*>', '', text, flags=re.IGNORECASE) - text = re.sub(r'', '', text, flags=re.IGNORECASE) - text = re.sub(r']*>', '', text, flags=re.IGNORECASE) - text = re.sub(r'', '', text, flags=re.IGNORECASE) - text = re.sub(r']*>', '', text, flags=re.IGNORECASE) - text = re.sub(r'

', '\n', text, flags=re.IGNORECASE) - - # First, handle common markdown table artifacts - text = re.sub(r'^[|\-\s:]+$', '', text, flags=re.MULTILINE) # Remove separator lines - text = re.sub(r'^\s*\|\s*|\s*\|\s*$', '', text) # Remove leading/trailing pipes - text = re.sub(r'\s*\|\s*', ' | ', text) # Normalize pipes - - # Remove markdown links, but keep other formatting characters for _format_inline - text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # Remove markdown links - - # Escape HTML special characters - text = text.replace('&', '&') - text = text.replace('<', '<') - text = text.replace('>', '>') - - # Clean up excessive whitespace - text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text) # Multiple blank lines to double - text = re.sub(r' +', ' ', text) # Multiple spaces to single - - return text.strip() - - def _get_cell_style(self, row_idx: int, is_header: bool = False, font_size: int = None) -> 'ParagraphStyle': - """Get the appropriate style for a table cell.""" - styles = getSampleStyleSheet() - - # Helper function to get the correct bold font name - def get_bold_font(font_family): - font_map = { - 'Helvetica': 'Helvetica-Bold', - 'Times-Roman': 'Times-Bold', - 'Courier': 'Courier-Bold', - } - if 'Bold' in font_family: - return font_family - return font_map.get(font_family, 'Helvetica-Bold') - - if is_header: - return ParagraphStyle( - 'TableHeader', - parent=styles['Normal'], - fontSize=self._param.font_size, - fontName=self._get_active_bold_font(), - textColor=colors.whitesmoke, - alignment=TA_CENTER, - leading=self._param.font_size * 1.2, - wordWrap='CJK' - ) - else: - font_size = font_size or (self._param.font_size - 1) - return ParagraphStyle( - 'TableCell', - parent=styles['Normal'], - fontSize=font_size, - fontName=self._get_active_font(), - textColor=colors.black, - alignment=TA_LEFT, - leading=font_size * 1.15, - wordWrap='CJK' - ) - - def _convert_table_to_definition_list(self, data: list[list[str]]) -> list: - """Convert a table to a definition list format for better handling of large content. - - This method handles both simple and complex tables, including those with nested content. - It ensures that large cell content is properly wrapped and paginated. - """ - elements = [] - styles = getSampleStyleSheet() - - # Base styles - base_font_size = getattr(self._param, 'font_size', 10) - - # Body style - body_style = ParagraphStyle( - 'TableBody', - parent=styles['Normal'], - fontSize=base_font_size, - fontName=self._get_active_font(), - textColor=colors.HexColor(getattr(self._param, 'text_color', '#000000')), - spaceAfter=6, - leading=base_font_size * 1.2 - ) - - # Label style (for field names) - label_style = ParagraphStyle( - 'LabelStyle', - parent=body_style, - fontName=self._get_active_bold_font(), - textColor=colors.HexColor('#2c3e50'), - fontSize=base_font_size, - spaceAfter=4, - leftIndent=0, - leading=base_font_size * 1.3 - ) - - # Value style (for cell content) - clean, no borders - value_style = ParagraphStyle( - 'ValueStyle', - parent=body_style, - leftIndent=15, - rightIndent=0, - spaceAfter=8, - spaceBefore=2, - fontSize=base_font_size, - textColor=colors.HexColor('#333333'), - alignment=TA_JUSTIFY, - leading=base_font_size * 1.4, - # No borders or background - clean text only - ) - - try: - # If we have no data, return empty list - if not data or not any(data): - return elements - - # Get column headers or generate them - headers = [] - if data and len(data) > 0: - headers = [str(h).strip() for h in data[0]] - - # If no headers or empty headers, generate them - if not any(headers): - headers = [f"Column {i+1}" for i in range(len(data[0]) if data and len(data) > 0 else 0)] - - # Process each data row (skip header if it exists) - start_row = 1 if len(data) > 1 and any(data[0]) else 0 - - for row_idx in range(start_row, len(data)): - row = data[row_idx] if row_idx < len(data) else [] - if not row: - continue - - # Create a container for the row - row_elements = [] - - # Process each cell in the row - for col_idx in range(len(headers)): - if col_idx >= len(headers): + var_value = self._canvas.get_variable_value(match) + if var_value is None: continue - - # Get cell content - cell_text = str(row[col_idx]).strip() if col_idx < len(row) and row[col_idx] is not None else "" - - # Skip empty cells - if not cell_text or cell_text.isspace(): - continue - - # Clean up markdown artifacts for regular text content - cell_text = str(cell_text) # Ensure it's a string - - # Remove markdown table formatting - cell_text = re.sub(r'^[|\-\s:]+$', '', cell_text, flags=re.MULTILINE) # Remove separator lines - cell_text = re.sub(r'^\s*\|\s*|\s*\|\s*$', '', cell_text) # Remove leading/trailing pipes - cell_text = re.sub(r'\s*\|\s*', ' | ', cell_text) # Normalize pipes - cell_text = re.sub(r'\s+', ' ', cell_text).strip() # Normalize whitespace - - # Remove any remaining markdown formatting - cell_text = re.sub(r'`(.*?)`', r'\1', cell_text) # Remove code ticks - cell_text = re.sub(r'\*\*(.*?)\*\*', r'\1', cell_text) # Remove bold - cell_text = re.sub(r'\*(.*?)\*', r'\1', cell_text) # Remove italic - - # Clean up any HTML entities or special characters - cell_text = self._escape_html(cell_text) - - # If content still looks like a table, convert it to plain text - if '|' in cell_text and ('--' in cell_text or any(cell_text.count('|') > 2 for line in cell_text.split('\n') if line.strip())): - # Convert to a simple text format - lines = [line.strip() for line in cell_text.split('\n') if line.strip()] - cell_text = ' | '.join(lines[:5]) # Join first 5 lines with pipe - if len(lines) > 5: - cell_text += '...' - - # Process long content with better wrapping - max_chars_per_line = 100 # Reduced for better readability - max_paragraphs = 3 # Maximum number of paragraphs to show initially - - # Split into paragraphs - paragraphs = [p for p in cell_text.split('\n\n') if p.strip()] - - # If content is too long, truncate with "show more" indicator - if len(paragraphs) > max_paragraphs or any(len(p) > max_chars_per_line * 3 for p in paragraphs): - wrapped_paragraphs = [] - - for i, para in enumerate(paragraphs[:max_paragraphs]): - if len(para) > max_chars_per_line * 3: - # Split long paragraphs - words = para.split() - current_line = [] - current_length = 0 - - for word in words: - if current_line and current_length + len(word) + 1 > max_chars_per_line: - wrapped_paragraphs.append(' '.join(current_line)) - current_line = [word] - current_length = len(word) - else: - current_line.append(word) - current_length += len(word) + (1 if current_line else 0) - - if current_line: - wrapped_paragraphs.append(' '.join(current_line)) - else: - wrapped_paragraphs.append(para) - - # Add "show more" indicator if there are more paragraphs - if len(paragraphs) > max_paragraphs: - wrapped_paragraphs.append(f"... and {len(paragraphs) - max_paragraphs} more paragraphs") - - cell_text = '\n\n'.join(wrapped_paragraphs) - - # Add label and content with clean formatting (no borders) - label_para = Paragraph(f"{self._escape_html(headers[col_idx])}:", label_style) - value_para = Paragraph(self._escape_html(cell_text), value_style) - - # Add elements with proper spacing - row_elements.append(label_para) - row_elements.append(Spacer(1, 0.03 * 72)) # Tiny space between label and value - row_elements.append(value_para) - - # Add spacing between rows - if row_elements and row_idx < len(data) - 1: - # Add a subtle horizontal line as separator - row_elements.append(Spacer(1, 0.1 * 72)) - row_elements.append(self._create_horizontal_line(width=0.5, color='#e0e0e0')) - row_elements.append(Spacer(1, 0.15 * 72)) - - elements.extend(row_elements) - - # Add some space after the table - if elements: - elements.append(Spacer(1, 0.3 * 72)) # 0.3 inches in points - - except Exception as e: - # Fallback to simple text representation if something goes wrong - error_style = ParagraphStyle( - 'ErrorStyle', - parent=styles['Normal'], - fontSize=base_font_size - 1, - textColor=colors.red, - backColor=colors.HexColor('#fff0f0'), - borderWidth=1, - borderColor=colors.red, - borderPadding=5 - ) - - error_msg = [ - Paragraph("Error processing table:", error_style), - Paragraph(str(e), error_style), - Spacer(1, 0.2 * 72) - ] - - # Add a simplified version of the table - try: - for row in data[:10]: # Limit to first 10 rows to avoid huge error output - error_msg.append(Paragraph(" | ".join(str(cell) for cell in row), body_style)) - if len(data) > 10: - error_msg.append(Paragraph(f"... and {len(data) - 10} more rows", body_style)) - except Exception: - pass - - elements.extend(error_msg) - - return elements - - def _create_table(self, table_lines: list[str]) -> Optional[list]: - """Create a table from markdown table syntax with robust error handling. - - This method handles simple tables and falls back to a list format for complex cases. - - Returns: - A list of flowables (could be a table or alternative representation) - Returns None if the table cannot be created. - """ - if not table_lines or len(table_lines) < 2: - return None - - try: - # Parse table data - data = [] - max_columns = 0 - - for line in table_lines: - # Skip separator lines (e.g., |---|---|) - if re.match(r'^\|[\s\-:]+\|$', line): - continue - - # Handle empty lines within tables - if not line.strip(): - continue - - # Split by | and clean up cells - cells = [] - in_quotes = False - current_cell = "" - - # Custom split to handle escaped pipes and quoted content - for char in line[1:]: # Skip initial | - if char == '|' and not in_quotes: - cells.append(current_cell.strip()) - current_cell = "" - elif char == '"': - in_quotes = not in_quotes - current_cell += char - elif char == '\\' and not in_quotes: - # Handle escaped characters - pass + if isinstance(var_value, partial): + resolved_content = "" + for chunk in var_value(): + resolved_content += chunk + content = content.replace("{" + match + "}", resolved_content) else: - current_cell += char - - # Add the last cell - if current_cell.strip() or len(cells) > 0: - cells.append(current_cell.strip()) - - # Remove empty first/last elements if they're empty (from leading/trailing |) - if cells and not cells[0]: - cells = cells[1:] - if cells and not cells[-1]: - cells = cells[:-1] - - if cells: - data.append(cells) - max_columns = max(max_columns, len(cells)) - - if not data or max_columns == 0: - return None - - # Ensure all rows have the same number of columns - for row in data: - while len(row) < max_columns: - row.append('') - - # Calculate available width for table - from reportlab.lib.pagesizes import A4 - page_width = A4[0] if self._param.orientation == 'portrait' else A4[1] - available_width = page_width - (self._param.margin_left + self._param.margin_right) * inch - - # Check if we should use definition list format - max_cell_length = max((len(str(cell)) for row in data for cell in row), default=0) - total_rows = len(data) - - # Use definition list format if: - # - Any cell is too large (> 300 chars), OR - # - More than 6 columns, OR - # - More than 20 rows, OR - # - Contains nested tables or complex structures - has_nested_tables = any('|' in cell and '---' in cell for row in data for cell in row) - has_complex_cells = any(len(str(cell)) > 150 for row in data for cell in row) - - should_use_list_format = ( - max_cell_length > 300 or - max_columns > 6 or - total_rows > 20 or - has_nested_tables or - has_complex_cells - ) - - if should_use_list_format: - return self._convert_table_to_definition_list(data) - - # Process cells for normal table - processed_data = [] - for row_idx, row in enumerate(data): - processed_row = [] - for cell_idx, cell in enumerate(row): - cell_text = str(cell).strip() if cell is not None else "" - - # Handle empty cells - if not cell_text: - processed_row.append("") - continue - - # Clean up markdown table artifacts - cell_text = re.sub(r'\\\|', '|', cell_text) # Unescape pipes - cell_text = re.sub(r'\\n', '\n', cell_text) # Handle explicit newlines - - # Check for nested tables - if '|' in cell_text and '---' in cell_text: - # This cell contains a nested table - nested_lines = [line.strip() for line in cell_text.split('\n') if line.strip()] - nested_table = self._create_table(nested_lines) - if nested_table: - processed_row.append(nested_table[0]) # Add the nested table - continue - - # Process as regular text - font_size = self._param.font_size - 1 if row_idx > 0 else self._param.font_size - try: - style = self._get_cell_style(row_idx, is_header=(row_idx == 0), font_size=font_size) - escaped_text = self._escape_html(cell_text) - processed_row.append(Paragraph(escaped_text, style)) - except Exception: - processed_row.append(self._escape_html(cell_text)) - - processed_data.append(processed_row) - - # Calculate column widths - min_col_width = 0.5 * inch - max_cols = int(available_width / min_col_width) - - if max_columns > max_cols: - return self._convert_table_to_definition_list(data) - - col_width = max(min_col_width, available_width / max_columns) - col_widths = [col_width] * max_columns - - # Create the table - try: - table = LongTable(processed_data, colWidths=col_widths, repeatRows=1) - - # Define table style - table_style = [ - ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2c3e50')), # Darker header - ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), - ('ALIGN', (0, 0), (-1, 0), 'CENTER'), - ('FONTNAME', (0, 0), (-1, 0), self._get_active_bold_font()), - ('FONTSIZE', (0, 0), (-1, -1), self._param.font_size - 1), - ('BOTTOMPADDING', (0, 0), (-1, 0), 12), - ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#f8f9fa')), # Lighter background - ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#dee2e6')), # Lighter grid - ('VALIGN', (0, 0), (-1, -1), 'TOP'), - ('TOPPADDING', (0, 0), (-1, -1), 8), - ('BOTTOMPADDING', (0, 0), (-1, -1), 8), - ('LEFTPADDING', (0, 0), (-1, -1), 8), - ('RIGHTPADDING', (0, 0), (-1, -1), 8), - ] - - # Add zebra striping for better readability - for i in range(1, len(processed_data)): - if i % 2 == 0: - table_style.append(('BACKGROUND', (0, i), (-1, i), colors.HexColor('#f1f3f5'))) - - table.setStyle(TableStyle(table_style)) - - # Add a small spacer after the table - return [table, Spacer(1, 0.2 * inch)] - - except Exception as table_error: - print(f"Error creating table: {table_error}") - return self._convert_table_to_definition_list(data) - - except Exception as e: - print(f"Error processing table: {e}") - # Return a simple text representation of the table - try: - text_content = [] - for row in data: - text_content.append(" | ".join(str(cell) for cell in row)) - return [Paragraph("
".join(text_content), self._get_cell_style(0))] - except Exception: - return None + content = content.replace("{" + match + "}", str(var_value)) + except Exception as e: + logging.warning("Error resolving variable %s: %s", match, str(e)) + content = content.replace("{" + match + "}", f"[ERROR: {str(e)}]") - def _create_horizontal_line(self, width: float = 1, color: str = None): - """Create a horizontal line with customizable width and color - - Args: - width: Line thickness in points (default: 1) - color: Hex color string (default: grey) - - Returns: - HRFlowable: Horizontal line element - """ - from reportlab.platypus import HRFlowable - line_color = colors.HexColor(color) if color else colors.grey - return HRFlowable(width="100%", thickness=width, color=line_color, spaceBefore=0, spaceAfter=0) + if content: + try: + content, _ = self.get_kwargs(content, kwargs) + except Exception as e: + logging.warning("Error processing content with get_kwargs: %s", str(e)) + + if not content: + content = kwargs.get("content", "") + + return content + + def _get_output_directory(self) -> str: + os.makedirs(self._default_output_directory, exist_ok=True) + return self._default_output_directory + + def _build_output_filename(self, output_format: str) -> str: + import uuid + + if self._param.filename: + return sanitize_filename(self._param.filename, output_format.lower()) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + return f"document_{timestamp}_{uuid.uuid4().hex[:8]}.{output_format}" + + def _get_timestamp_text(self) -> str: + return f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + + def _write_bytes_output(self, content: bytes, extension: str) -> tuple[str, bytes]: + output_directory = self._get_output_directory() + filename = self._build_output_filename(extension) + file_path = os.path.join(output_directory, filename) + with open(file_path, "wb") as f: + f.write(content) + return file_path, content + + def _build_markdown_source(self, content: str, include_timestamp_in_body: bool = False) -> str: + if not (include_timestamp_in_body and self._param.add_timestamp): + return content + return f"{self._get_timestamp_text()}\n\n{content}" + + def _get_heading_sizes(self) -> tuple[int, int, int]: + base = int(self._param.font_size) + return base + 6, base + 4, base + 2 + + def _generate_pandoc_binary_output( + self, + content: str, + target_format: str, + extension: str, + include_timestamp_in_body: bool = False, + extra_args: list[str] | None = None, + ) -> tuple[str, bytes]: + import pypandoc + + output_directory = self._get_output_directory() + filename = self._build_output_filename(extension) + file_path = os.path.join(output_directory, filename) + markdown_content = self._build_markdown_source( + content, + include_timestamp_in_body=include_timestamp_in_body, + ) + + pypandoc.convert_text( + markdown_content, + to=target_format, + format="markdown", + outputfile=file_path, + extra_args=extra_args or [], + ) + + with open(file_path, "rb") as f: + file_bytes = f.read() + + return file_path, file_bytes + + def _generate_pandoc_text_output( + self, + content: str, + target_format: str, + extension: str, + include_timestamp_in_body: bool = True, + ) -> tuple[str, bytes]: + import pypandoc + + markdown_content = self._build_markdown_source( + content, + include_timestamp_in_body=include_timestamp_in_body, + ) + converted_content = pypandoc.convert_text( + markdown_content, + to=target_format, + format="markdown", + ) + return self._write_bytes_output(converted_content.encode("utf-8"), extension) + + def _select_pdf_engine(self) -> str: + if shutil.which("xelatex"): + return "xelatex" + raise Exception("No PDF engine found. Install xelatex.") + + def _get_pdf_font_args(self) -> list[str]: + return [ + "-V", + f"mainfont={self._pdf_main_font}", + "-V", + f"CJKmainfont={self._pdf_cjk_font}", + ] + + def _get_pdf_overlay_font_name(self) -> str: + from reportlab.pdfbase import pdfmetrics + from reportlab.pdfbase.cidfonts import UnicodeCIDFont - def _add_logo(self) -> Optional[Image]: - """Add logo image to PDF""" try: - # Check if it's base64 or file path - if self._param.logo_image.startswith('data:image'): - # Extract base64 data - base64_data = self._param.logo_image.split(',')[1] - image_data = base64.b64decode(base64_data) - img = Image(BytesIO(image_data)) - elif os.path.exists(self._param.logo_image): - img = Image(self._param.logo_image) - else: - return None - - # Set size - img.drawWidth = self._param.logo_width * inch - img.drawHeight = self._param.logo_height * inch - - # Set alignment - if self._param.logo_position == 'center': - img.hAlign = 'CENTER' - elif self._param.logo_position == 'right': - img.hAlign = 'RIGHT' - else: - img.hAlign = 'LEFT' - - return img - except Exception as e: - print(f"Error adding logo: {e}") + pdfmetrics.getFont(self._pdf_overlay_font) + except KeyError: + pdfmetrics.registerFont(UnicodeCIDFont(self._pdf_overlay_font)) + + return self._pdf_overlay_font + + def _build_pdf_heading_overrides(self) -> str: + font_size = int(self._param.font_size) + leading = round(font_size * 1.2, 1) + h1_size, h2_size, h3_size = self._get_heading_sizes() + h1_leading = round(h1_size * 1.2, 1) + h2_leading = round(h2_size * 1.2, 1) + h3_leading = round(h3_size * 1.2, 1) + + return rf""" +\makeatletter +\renewcommand\normalsize{{ + \@setfontsize\normalsize{{{font_size}pt}}{{{leading}pt}} + \abovedisplayskip 12pt plus 3pt minus 7pt + \abovedisplayshortskip \z@ plus 3pt + \belowdisplayshortskip 6.5pt plus 3.5pt minus 3pt + \belowdisplayskip \abovedisplayskip + \let\@listi\@listI +}} +\normalsize +\renewcommand\section{{\@startsection{{section}}{{1}}{{\z@}}{{-3.5ex \@plus -1ex \@minus -.2ex}}{{2.3ex \@plus .2ex}}{{\normalfont\fontsize{{{h1_size}pt}}{{{h1_leading}pt}}\selectfont\bfseries}}}} +\renewcommand\subsection{{\@startsection{{subsection}}{{2}}{{\z@}}{{-3.25ex\@plus -1ex \@minus -.2ex}}{{1.5ex \@plus .2ex}}{{\normalfont\fontsize{{{h2_size}pt}}{{{h2_leading}pt}}\selectfont\bfseries}}}} +\renewcommand\subsubsection{{\@startsection{{subsubsection}}{{3}}{{\z@}}{{-3.25ex\@plus -1ex \@minus -.2ex}}{{1.5ex \@plus .2ex}}{{\normalfont\fontsize{{{h3_size}pt}}{{{h3_leading}pt}}\selectfont\bfseries}}}} +\makeatother +""".strip() + + def _write_temp_tex(self, content: str) -> str: + output_directory = self._get_output_directory() + with tempfile.NamedTemporaryFile( + mode="w", + encoding="utf-8", + suffix=".tex", + dir=output_directory, + delete=False, + ) as f: + f.write(content) + return f.name + + def _should_apply_pdf_overlay(self) -> bool: + return any( + [ + self._param.header_text, + self._param.footer_text, + self._param.watermark_text, + self._param.add_page_numbers, + self._param.add_timestamp, + ] + ) + + def _build_pdf_overlay_page(self, width: float, height: float, page_number: int): + if not self._should_apply_pdf_overlay(): return None - def _add_page_decorations(self, canvas, doc): - """Add header, footer, page numbers, watermark""" - canvas.saveState() - - # Get active font for decorations - active_font = self._get_active_font() - - # Add watermark + from pypdf import PdfReader + from reportlab.lib.colors import Color + from reportlab.pdfgen import canvas as pdf_canvas + + buffer = BytesIO() + overlay = pdf_canvas.Canvas(buffer, pagesize=(width, height)) + overlay_font = self._get_pdf_overlay_font_name() + if self._param.watermark_text: - canvas.setFont(active_font, 60) - canvas.setFillColorRGB(0.9, 0.9, 0.9, alpha=0.3) - canvas.saveState() - canvas.translate(doc.pagesize[0] / 2, doc.pagesize[1] / 2) - canvas.rotate(45) - canvas.drawCentredString(0, 0, self._param.watermark_text) - canvas.restoreState() - - # Add header + overlay.saveState() + if hasattr(overlay, "setFillAlpha"): + overlay.setFillAlpha(0.15) + overlay.setFillColor(Color(0.6, 0.6, 0.6)) + overlay.setFont(overlay_font, 48) + overlay.translate(width / 2, height / 2) + overlay.rotate(45) + overlay.drawCentredString(0, 0, self._param.watermark_text) + overlay.restoreState() + + overlay.setFont(overlay_font, self._overlay_font_size) + overlay.setFillColor(Color(0.35, 0.35, 0.35)) + if self._param.header_text: - canvas.setFont(active_font, 9) - canvas.setFillColorRGB(0.5, 0.5, 0.5) - canvas.drawString(doc.leftMargin, doc.pagesize[1] - 0.5 * inch, self._param.header_text) - - # Add footer + overlay.drawString( + self._overlay_margin, + height - self._overlay_margin + 8, + self._param.header_text, + ) + if self._param.footer_text: - canvas.setFont(active_font, 9) - canvas.setFillColorRGB(0.5, 0.5, 0.5) - canvas.drawString(doc.leftMargin, 0.5 * inch, self._param.footer_text) - - # Add page numbers + overlay.drawString( + self._overlay_margin, + self._overlay_margin - 8, + self._param.footer_text, + ) + + if self._param.add_timestamp: + overlay.drawCentredString( + width / 2, + self._overlay_margin - 8, + self._get_timestamp_text(), + ) + if self._param.add_page_numbers: - page_num = canvas.getPageNumber() - text = f"Page {page_num}" - canvas.setFont(active_font, 9) - canvas.setFillColorRGB(0.5, 0.5, 0.5) - canvas.drawRightString(doc.pagesize[0] - doc.rightMargin, 0.5 * inch, text) - - canvas.restoreState() + overlay.drawRightString( + width - self._overlay_margin, + self._overlay_margin - 8, + f"Page {page_number}", + ) + + overlay.save() + buffer.seek(0) + return PdfReader(buffer).pages[0] + + def _apply_pdf_overlay(self, file_path: str) -> tuple[str, bytes]: + from pypdf import PdfReader, PdfWriter + + if not self._should_apply_pdf_overlay(): + with open(file_path, "rb") as f: + file_bytes = f.read() + return file_path, file_bytes + + reader = PdfReader(file_path) + writer = PdfWriter() + + for page_number, page in enumerate(reader.pages, start=1): + overlay_page = self._build_pdf_overlay_page( + float(page.mediabox.width), + float(page.mediabox.height), + page_number, + ) + if overlay_page is not None: + page.merge_page(overlay_page) + writer.add_page(page) + + temp_file = f"{file_path}.overlay" + with open(temp_file, "wb") as f: + writer.write(f) + + os.replace(temp_file, file_path) + with open(file_path, "rb") as f: + file_bytes = f.read() + return file_path, file_bytes + + def _clear_docx_container(self, container): + element = container._element + for child in list(element): + element.remove(child) + + def _append_docx_field(self, run, instruction: str): + from docx.oxml import OxmlElement + + begin = OxmlElement("w:fldChar") + begin.set(run.part.element.nsmap["w"] and "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}fldCharType", "begin") + + instr = OxmlElement("w:instrText") + instr.set("{http://www.w3.org/XML/1998/namespace}space", "preserve") + instr.text = instruction + + end = OxmlElement("w:fldChar") + end.set(run.part.element.nsmap["w"] and "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}fldCharType", "end") + + run._r.append(begin) + run._r.append(instr) + run._r.append(end) + + def _add_docx_watermark(self, section): + if not self._param.watermark_text: + return + + from docx.enum.text import WD_ALIGN_PARAGRAPH + from docx.oxml import parse_xml + + header = section.header + paragraph = header.add_paragraph() + paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = paragraph.add_run() + watermark_xml = parse_xml( + rf""" + + + + + + + """ + ) + run._r.append(watermark_xml) + + def _normalize_docx_section_geometry(self, section, default_section): + for attr in ("page_width", "left_margin", "right_margin"): + if getattr(section, attr) is None: + setattr(section, attr, getattr(default_section, attr)) + + def _get_docx_available_width(self, section): + page_width = section.page_width + left_margin = section.left_margin + right_margin = section.right_margin + + if page_width is None or left_margin is None or right_margin is None: + raise ValueError("DOCX section geometry is incomplete after normalization.") + + return page_width - left_margin - right_margin + + def _decorate_docx(self, file_path: str) -> tuple[str, bytes]: + from docx import Document + from docx.enum.text import WD_TAB_ALIGNMENT + from docx.shared import Pt + + document = Document(file_path) + default_section = Document().sections[0] + h1_size, h2_size, h3_size = self._get_heading_sizes() + + style_map = { + "Normal": int(self._param.font_size), + "Heading 1": h1_size, + "Heading 2": h2_size, + "Heading 3": h3_size, + } + for style_name, size in style_map.items(): + try: + document.styles[style_name].font.size = Pt(size) + except Exception: + continue + + for section in document.sections: + self._normalize_docx_section_geometry(section, default_section) + available_width = self._get_docx_available_width(section) + + header = section.header + header.is_linked_to_previous = False + self._clear_docx_container(header) + if self._param.header_text: + paragraph = header.add_paragraph() + paragraph.add_run(self._param.header_text) + + self._add_docx_watermark(section) + + footer = section.footer + footer.is_linked_to_previous = False + self._clear_docx_container(footer) + if any( + [ + self._param.footer_text, + self._param.add_timestamp, + self._param.add_page_numbers, + ] + ): + paragraph = footer.add_paragraph() + paragraph.paragraph_format.tab_stops.add_tab_stop( + int(available_width // 2), + WD_TAB_ALIGNMENT.CENTER, + ) + paragraph.paragraph_format.tab_stops.add_tab_stop( + int(available_width), + WD_TAB_ALIGNMENT.RIGHT, + ) + + if self._param.footer_text: + paragraph.add_run(self._param.footer_text) + + if self._param.add_timestamp or self._param.add_page_numbers: + paragraph.add_run("\t") + + if self._param.add_timestamp: + paragraph.add_run(self._get_timestamp_text()) + + if self._param.add_page_numbers: + paragraph.add_run("\t") + self._append_docx_field(paragraph.add_run(), " PAGE ") + + document.save(file_path) + with open(file_path, "rb") as f: + file_bytes = f.read() + return file_path, file_bytes def thoughts(self) -> str: - return "Generating PDF document with formatted content..." + return f"Generating {self._param.output_format.upper()} document with markdown conversion..." - def _generate_docx(self, content: str, title: str = "", subtitle: str = "") -> tuple[str, str]: - """Generate DOCX from markdown-style content""" - import uuid - from docx import Document - from docx.shared import Pt - from docx.enum.text import WD_ALIGN_PARAGRAPH - - # Create output directory if it doesn't exist - os.makedirs(self._param.output_directory, exist_ok=True) - + def _generate_pdf(self, content: str) -> tuple[str, bytes]: try: - # Generate filename - if self._param.filename: - base_name = os.path.splitext(self._param.filename)[0] - filename = f"{base_name}_{uuid.uuid4().hex[:8]}.docx" - else: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"document_{timestamp}_{uuid.uuid4().hex[:8]}.docx" - - file_path = os.path.join(self._param.output_directory, filename) - - # Create document - doc = Document() - - # Add title - if title: - title_para = doc.add_heading(title, level=0) - title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER - - # Add subtitle - if subtitle: - subtitle_para = doc.add_heading(subtitle, level=1) - subtitle_para.alignment = WD_ALIGN_PARAGRAPH.CENTER - - # Add timestamp if enabled - if self._param.add_timestamp: - timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - ts_para = doc.add_paragraph(timestamp_text) - ts_para.runs[0].italic = True - ts_para.runs[0].font.size = Pt(9) - - # Parse and add content - lines = content.split('\n') - i = 0 - while i < len(lines): - line = lines[i].strip() - - if not line: - i += 1 - continue - - # Headings - if line.startswith('# ') and not line.startswith('## '): - doc.add_heading(line[2:].strip(), level=1) - elif line.startswith('## ') and not line.startswith('### '): - doc.add_heading(line[3:].strip(), level=2) - elif line.startswith('### '): - doc.add_heading(line[4:].strip(), level=3) - # Bullet list - elif line.startswith('- ') or line.startswith('* '): - doc.add_paragraph(line[2:].strip(), style='List Bullet') - # Numbered list - elif re.match(r'^\d+\.\s', line): - text = re.sub(r'^\d+\.\s', '', line) - doc.add_paragraph(text, style='List Number') - # Regular paragraph - else: - para = doc.add_paragraph(line) - para.runs[0].font.size = Pt(self._param.font_size) - - i += 1 - - # Save document - doc.save(file_path) - - # Read and encode to base64 - with open(file_path, 'rb') as f: - doc_bytes = f.read() - doc_base64 = base64.b64encode(doc_bytes).decode('utf-8') - - return file_path, doc_base64 - + engine = self._select_pdf_engine() + header_path = self._write_temp_tex(self._build_pdf_heading_overrides()) + try: + file_path, _ = self._generate_pandoc_binary_output( + content, + "pdf", + "pdf", + include_timestamp_in_body=False, + extra_args=[ + "--standalone", + f"--pdf-engine={engine}", + f"--include-in-header={header_path}", + *self._get_pdf_font_args(), + ], + ) + finally: + if os.path.exists(header_path): + os.remove(header_path) + return self._apply_pdf_overlay(file_path) + except Exception as e: + raise Exception(f"PDF generation failed: {str(e)}") + + def _generate_docx(self, content: str) -> tuple[str, bytes]: + try: + file_path, _ = self._generate_pandoc_binary_output( + content, + "docx", + "docx", + include_timestamp_in_body=False, + extra_args=["--standalone"], + ) + return self._decorate_docx(file_path) except Exception as e: raise Exception(f"DOCX generation failed: {str(e)}") - def _generate_txt(self, content: str, title: str = "", subtitle: str = "") -> tuple[str, str]: - """Generate TXT from markdown-style content""" - import uuid - - # Create output directory if it doesn't exist - os.makedirs(self._param.output_directory, exist_ok=True) - + def _generate_txt(self, content: str) -> tuple[str, bytes]: try: - # Generate filename - if self._param.filename: - base_name = os.path.splitext(self._param.filename)[0] - filename = f"{base_name}_{uuid.uuid4().hex[:8]}.txt" - else: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"document_{timestamp}_{uuid.uuid4().hex[:8]}.txt" - - file_path = os.path.join(self._param.output_directory, filename) - - # Build text content - text_content = [] - - if title: - text_content.append(title.upper()) - text_content.append("=" * len(title)) - text_content.append("") - - if subtitle: - text_content.append(subtitle) - text_content.append("-" * len(subtitle)) - text_content.append("") - - if self._param.add_timestamp: - timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - text_content.append(timestamp_text) - text_content.append("") - - # Add content (keep markdown formatting for readability) - text_content.append(content) - - # Join and save - final_text = '\n'.join(text_content) - - with open(file_path, 'w', encoding='utf-8') as f: - f.write(final_text) - - # Encode to base64 - txt_base64 = base64.b64encode(final_text.encode('utf-8')).decode('utf-8') - - return file_path, txt_base64 - + return self._generate_pandoc_text_output(content, "plain", "txt") except Exception as e: raise Exception(f"TXT generation failed: {str(e)}") + + def _generate_markdown(self, content: str) -> tuple[str, bytes]: + try: + return self._generate_pandoc_text_output(content, "markdown", "md") + except Exception as e: + raise Exception(f"Markdown generation failed: {str(e)}") + + def _generate_html(self, content: str) -> tuple[str, bytes]: + try: + return self._generate_pandoc_text_output(content, "html", "html") + except Exception as e: + raise Exception(f"HTML generation failed: {str(e)}") diff --git a/agent/component/message.py b/agent/component/message.py index cc26ca52ba..8db4eedbd1 100644 --- a/agent/component/message.py +++ b/agent/component/message.py @@ -54,6 +54,9 @@ class MessageParam(ComponentParamBase): self.outputs = { "content": { "type": "str" + }, + "downloads": { + "type": "list" } } @@ -66,10 +69,66 @@ class MessageParam(ComponentParamBase): class Message(ComponentBase): component_name = "Message" + @staticmethod + def _is_download_info(value: Any) -> bool: + return isinstance(value, dict) and all( + key in value for key in ("doc_id", "filename", "mime_type") + ) + + def _extract_downloads(self, value: Any) -> list[dict[str, Any]]: + if isinstance(value, str): + try: + value = json.loads(value) + except Exception: + return [] + + if self._is_download_info(value): + return [value] + + if isinstance(value, list) and all(self._is_download_info(item) for item in value): + return value + + return [] + + def _stringify_message_value( + self, + value: Any, + delimiter: str = None, + downloads: list[dict[str, Any]] | None = None, + fallback_to_str: bool = False, + ) -> str: + extracted_downloads = self._extract_downloads(value) + if extracted_downloads: + if downloads is not None: + downloads.extend(extracted_downloads) + return "" + + if value is None: + return "" + + if isinstance(value, list) and delimiter: + return delimiter.join([str(vv) for vv in value]) + + if isinstance(value, str): + return value + + try: + return json.dumps(value, ensure_ascii=False) + except Exception: + if fallback_to_str: + return str(value) + return "" + def get_input_elements(self) -> dict[str, Any]: return self.get_input_elements_from_text("".join(self._param.content)) - def get_kwargs(self, script:str, kwargs:dict = {}, delimiter:str=None) -> tuple[str, dict[str, str | list | Any]]: + def get_kwargs( + self, + script: str, + kwargs: dict = {}, + delimiter: str = None, + downloads: list[dict[str, Any]] | None = None, + ) -> tuple[str, dict[str, str | list | Any]]: for k,v in self.get_input_elements_from_text(script).items(): if k in kwargs: continue @@ -84,15 +143,8 @@ class Message(ComponentBase): else: for t in iter_obj: ans += t - elif isinstance(v, list) and delimiter: - ans = delimiter.join([str(vv) for vv in v]) - elif not isinstance(v, str): - try: - ans = json.dumps(v, ensure_ascii=False) - except Exception: - pass else: - ans = v + ans = self._stringify_message_value(v, delimiter, downloads) if not ans: ans = "" kwargs[k] = ans @@ -115,6 +167,7 @@ class Message(ComponentBase): s = 0 all_content = "" cache = {} + downloads = [] for r in re.finditer(self.variable_ref_patt, rand_cnt, flags=re.DOTALL): if self.check_if_canceled("Message streaming"): return @@ -154,11 +207,9 @@ class Message(ComponentBase): continue elif inspect.isawaitable(v): v = await v - elif not isinstance(v, str): - try: - v = json.dumps(v, ensure_ascii=False) - except Exception: - v = str(v) + v = self._stringify_message_value( + v, downloads=downloads, fallback_to_str=True + ) yield v self.set_input_value(exp, v) all_content += v @@ -171,6 +222,7 @@ class Message(ComponentBase): all_content += rand_cnt[s: ] yield rand_cnt[s: ] + self.set_output("downloads", downloads) self.set_output("content", all_content) self._convert_content(all_content) await self._save_to_memory(all_content) @@ -191,12 +243,14 @@ class Message(ComponentBase): self.set_output("content", partial(self._stream, rand_cnt)) return - rand_cnt, kwargs = self.get_kwargs(rand_cnt, kwargs) + downloads = [] + rand_cnt, kwargs = self.get_kwargs(rand_cnt, kwargs, downloads=downloads) template = _jinja2_sandbox.from_string(rand_cnt) try: content = template.render(kwargs) - except Exception: - pass + except Exception as e: + logging.warning(f"Jinja2 template rendering failed: {e}") + content = rand_cnt # fallback to unrendered content if self.check_if_canceled("Message processing"): return @@ -204,6 +258,7 @@ class Message(ComponentBase): for n, v in kwargs.items(): content = re.sub(n, v, content) + self.set_output("downloads", downloads) self.set_output("content", content) self._convert_content(content) self._save_to_memory(content) diff --git a/agent/dsl_migration.py b/agent/dsl_migration.py index 6fef629376..ca4ee894c3 100644 --- a/agent/dsl_migration.py +++ b/agent/dsl_migration.py @@ -22,6 +22,7 @@ import re COMPONENT_RENAMES = { "Splitter": "TokenChunker", "HierarchicalMerger": "TitleChunker", + "PDFGenerator": "DocGenerator", } NODE_TYPE_RENAMES = { diff --git a/api/apps/user_app.py b/api/apps/user_app.py index 702e1bd855..7424899269 100644 --- a/api/apps/user_app.py +++ b/api/apps/user_app.py @@ -1029,7 +1029,6 @@ async def forget_reset_password(): new_pwd_string = base64.b64decode(new_pwd_base64).decode('utf-8') new_pwd2_string = base64.b64decode(decrypt(new_pwd2)).decode('utf-8') - REDIS_CONN.get(_verified_key(email)) if not REDIS_CONN.get(_verified_key(email)): return get_json_result(data=False, code=RetCode.AUTHENTICATION_ERROR, message="email not verified") diff --git a/docs/guides/agent/agent_component_reference/docs_generator.md b/docs/guides/agent/agent_component_reference/docs_generator.md deleted file mode 100644 index 3ed8e342af..0000000000 --- a/docs/guides/agent/agent_component_reference/docs_generator.md +++ /dev/null @@ -1,241 +0,0 @@ ---- -sidebar_position: 35 -slug: /docs_generator ---- - -# Docs Generator component - -A component that generates downloadable PDF, DOCX, or TXT documents from markdown-style content with full Unicode support. - ---- - -The **Docs Generator** component enables you to create professional documents directly within your agent workflow. It accepts markdown-formatted text and converts it into downloadable files, making it ideal for generating reports, summaries, or any structured document output. - -## Key features - -- **Multiple output formats**: PDF, DOCX, and TXT -- **Full Unicode support**: Automatic font switching for CJK (Chinese, Japanese, Korean), Arabic, Hebrew, and other non-Latin scripts -- **Rich formatting**: Headers, lists, tables, code blocks, and more -- **Customizable styling**: Fonts, margins, page size, and orientation -- **Document extras**: Logo, watermark, page numbers, and timestamps -- **Direct download**: Generates a download button for the chat interface - -## Prerequisites - -- Content to be converted into a document (typically from an **Agent** or other text-generating component). - -## Examples - -You can pair an **Agent** component with the **Docs Generator** to create dynamic documents based on user queries. The **Agent** generates the content, and the **Docs Generator** converts it into a downloadable file. Connect the output to a **Message** component to display the download button in the chat. - -A typical workflow looks like: - -``` -Begin → Agent → Docs Generator → Message -``` - -In the **Message** component, reference the `download` output variable from the **Docs Generator** to display a download button in the chat interface. - -## Configurations - -### Content - -The main text content to include in the document. Supports Markdown formatting: - -- **Bold**: `**text**` or `__text__` -- **Italic**: `*text*` or `_text_` -- **Inline code**: `` `code` `` -- **Headings**: `# Heading 1`, `## Heading 2`, `### Heading 3` -- **Bullet lists**: `- item` or `* item` -- **Numbered lists**: `1. item` -- **Tables**: `| Column 1 | Column 2 |` -- **Horizontal lines**: `---` -- **Code blocks**: ` ``` code ``` ` - -:::tip NOTE -Click **(x)** or type `/` to insert variables from upstream components. -::: - -### Title - -Optional. The document title displayed at the top of the generated file. - -### Subtitle - -Optional. A subtitle displayed below the title. - -### Output format - -The file format for the generated document: - -- **PDF** (default): Portable Document Format with full styling support. -- **DOCX**: Microsoft Word format. -- **TXT**: Plain text format. - -### Logo image - -Optional. A logo image to display at the top of the document. You can either: - -- Upload an image file using the file picker -- Paste an image path, URL, or base64-encoded data - -### Logo position - -The horizontal position of the logo: - -- **left** (default) -- **center** -- **right** - -### Logo dimensions - -- **Logo width**: Width in inches (default: `2.0`) -- **Logo height**: Height in inches (default: `1.0`) - -### Font family - -The font used throughout the document: - -- **Helvetica** (default) -- **Times-Roman** -- **Courier** -- **Helvetica-Bold** -- **Times-Bold** - -### Font size - -The base font size in points. Defaults to `12`. - -### Title font size - -The font size for the document title. Defaults to `24`. - -### Page size - -The paper size for the document: - -- **A4** (default) -- **Letter** - -### Orientation - -The page orientation: - -- **Portrait** (default) -- **Landscape** - -### Margins - -Page margins in inches: - -- **Margin top**: Defaults to `1.0` -- **Margin bottom**: Defaults to `1.0` -- **Margin left**: Defaults to `1.0` -- **Margin right**: Defaults to `1.0` - -### Filename - -Optional. Custom filename for the generated document. If left empty, a filename is auto-generated with a timestamp. - -### Output directory - -The server directory where generated documents are saved. Defaults to `/tmp/pdf_outputs`. - -### Add page numbers - -When enabled, page numbers are added to the footer of each page. Defaults to `true`. - -### Add timestamp - -When enabled, a generation timestamp is added to the document footer. Defaults to `true`. - -### Watermark text - -Optional. Text to display as a diagonal watermark across each page. Useful for marking documents as "Draft", "Confidential", etc. - -## Output - -The **Docs Generator** component provides the following output variables: - -| Variable name | Type | Description | -|---------------|-----------|--------------------------------------------------------------| -| `file_path` | `string` | The server path where the generated document is saved. | -| `pdf_base64` | `string` | The document content encoded in base64 format. | -| `download` | `string` | JSON containing download information for the chat interface. | -| `success` | `boolean` | Indicates whether the document was generated successfully. | - -### Displaying the download button - -To display a download button in the chat, add a **Message** component after the **Docs Generator** and reference the `download` variable: - -1. Connect the **Docs Generator** output to a **Message** component. -2. In the **Message** component's content field, type `/` and select `{Docs Generator_0@download}`. -3. When the agent runs, a download button will appear in the chat, allowing users to download the generated document. - -The download button automatically handles: -- File type detection (PDF, DOCX, TXT) -- Proper MIME type for browser downloads -- Base64 decoding for direct file delivery - -## Unicode and multi-language support - -The **Docs Generator** includes intelligent font handling for international content: - -### How it works - -1. **Content analysis**: The component scans the text for non-Latin characters. -2. **Automatic font switching**: When CJK or other complex scripts are detected, the system automatically switches to a compatible CID font (STSong-Light for Chinese, HeiseiMin-W3 for Japanese, HYSMyeongJo-Medium for Korean). -3. **Latin content**: For documents containing only Latin characters (including extended Latin, Cyrillic, and Greek), the user-selected font family is used. - -### Supported scripts - -| Script | Unicode Range | Font Used | -|------------------------------|---------------|--------------------| -| Chinese (CJK) | U+4E00–U+9FFF | STSong-Light | -| Japanese (Hiragana/Katakana) | U+3040–U+30FF | HeiseiMin-W3 | -| Korean (Hangul) | U+AC00–U+D7AF | HYSMyeongJo-Medium | -| Arabic | U+0600–U+06FF | CID font fallback | -| Hebrew | U+0590–U+05FF | CID font fallback | -| Devanagari (Hindi) | U+0900–U+097F | CID font fallback | -| Thai | U+0E00–U+0E7F | CID font fallback | - -### Font installation - -For full multi-language support in self-hosted deployments, ensure Unicode fonts are installed: - -**Linux (Debian/Ubuntu):** -```bash -apt-get install fonts-freefont-ttf fonts-noto-cjk -``` - -**Docker:** The official RAGFlow Docker image includes these fonts. For custom images, add the font packages to your Dockerfile: -```dockerfile -RUN apt-get update && apt-get install -y fonts-freefont-ttf fonts-noto-cjk -``` - -:::tip NOTE -CID fonts (STSong-Light, HeiseiMin-W3, etc.) are built into ReportLab and do not require additional installation. They are used automatically when CJK content is detected. -::: - -## Troubleshooting - -### Characters appear as boxes or question marks - -This indicates missing font support. Ensure: -1. The content contains supported Unicode characters. -2. For self-hosted deployments, Unicode fonts are installed on the server. -3. The document is being viewed in a PDF reader that supports embedded fonts. - -### Download button not appearing - -Ensure: -1. The **Message** component is connected after the **Docs Generator**. -2. The `download` variable is correctly referenced using `/` (which appears as `{Docs Generator_0@download}` when copied). -3. The document generation completed successfully (check `success` output). - -### Large tables not rendering correctly - -For tables with many columns or large cell content: -- The component automatically converts wide tables to a definition list format for better readability. -- Consider splitting large tables into multiple smaller tables. -- Use landscape orientation for wide tables. diff --git a/web/src/components/document-download-button/index.tsx b/web/src/components/document-download-button/index.tsx new file mode 100644 index 0000000000..02eefdd461 --- /dev/null +++ b/web/src/components/document-download-button/index.tsx @@ -0,0 +1,88 @@ +import { Button } from '@/components/ui/button'; +import { IDocumentDownloadInfo } from '@/interfaces/database/chat'; +import { downloadFile } from '@/services/file-manager-service'; +import { downloadFileFromBlob } from '@/utils/file-util'; +import { Download, FileText } from 'lucide-react'; +import { useCallback } from 'react'; + +export type DocumentDownloadInfo = IDocumentDownloadInfo; + +interface DocumentDownloadButtonProps { + downloadInfo: DocumentDownloadInfo; + className?: string; +} + +export function DocumentDownloadButton({ + downloadInfo, + className, +}: DocumentDownloadButtonProps) { + const handleDownload = useCallback(async () => { + try { + const ext = + downloadInfo.filename.split('.').pop()?.toLowerCase() || 'bin'; + const response = await downloadFile({ + docId: downloadInfo.doc_id, + ext, + }); + const blob = new Blob([response.data], { + type: downloadInfo.mime_type || response.data.type, + }); + downloadFileFromBlob(blob, downloadInfo.filename); + } catch (error) { + console.error('Error downloading document:', error); + } + }, [downloadInfo]); + + const getDocumentType = () => { + if (downloadInfo.mime_type === 'application/pdf') return 'PDF Document'; + if ( + downloadInfo.mime_type === + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' + ) + return 'Word Document'; + if ( + downloadInfo.mime_type === + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + ) + return 'Excel Document'; + if (downloadInfo.mime_type === 'text/plain') return 'Text Document'; + if (downloadInfo.mime_type === 'text/markdown') return 'Markdown Document'; + if (downloadInfo.mime_type === 'text/html') return 'HTML Document'; + + const ext = downloadInfo.filename.split('.').pop()?.toUpperCase(); + if (ext === 'PDF') return 'PDF Document'; + if (ext === 'DOCX') return 'Word Document'; + if (ext === 'XLSX') return 'Excel Document'; + if (ext === 'TXT') return 'Text Document'; + if (ext === 'MD') return 'Markdown Document'; + if (ext === 'HTML' || ext === 'HTM') return 'HTML Document'; + + return 'Document'; + }; + + return ( +
+
+
+ +
+
+
+
+ {downloadInfo.filename} +
+
{getDocumentType()}
+
+ +
+ ); +} diff --git a/web/src/components/message-item/index.tsx b/web/src/components/message-item/index.tsx index 4f8814a2d1..25a68e22fe 100644 --- a/web/src/components/message-item/index.tsx +++ b/web/src/components/message-item/index.tsx @@ -10,15 +10,11 @@ import { memo, useCallback, useMemo } from 'react'; import { IRegenerateMessage, IRemoveMessageById } from '@/hooks/logic-hooks'; import { cn } from '@/lib/utils'; +import { DocumentDownloadButton } from '../document-download-button'; import MarkdownContent from '../markdown-content'; import { ReferenceDocumentList } from '../next-message-item/reference-document-list'; import { ReferenceImageList } from '../next-message-item/reference-image-list'; import { UploadedMessageFiles } from '../next-message-item/uploaded-message-files'; -import { - PDFDownloadButton, - extractPDFDownloadInfo, - removePDFDownloadInfo, -} from '../pdf-download-button'; import { RAGFlowAvatar } from '../ragflow-avatar'; import SvgIcon from '../svg-icon'; import { useTheme } from '../theme-provider'; @@ -67,19 +63,11 @@ const MessageItem = ({ return reference?.doc_aggs ?? []; }, [reference?.doc_aggs]); - // Extract PDF download info from message content - const pdfDownloadInfo = useMemo( - () => extractPDFDownloadInfo(item.content), - [item.content], + const documentDownloadInfos = useMemo( + () => item.downloads ?? [], + [item.downloads], ); - - // If we have PDF download info, extract the remaining text - const messageContent = useMemo(() => { - if (!pdfDownloadInfo) return item.content; - - // Remove the JSON part from the content to avoid showing it - return removePDFDownloadInfo(item.content, pdfDownloadInfo); - }, [item.content, pdfDownloadInfo]); + const messageContent = item.content; const handleRegenerateMessage = useCallback(() => { regenerateMessage?.(item); @@ -129,7 +117,7 @@ const MessageItem = ({ index !== 0 && ( )} - {/* Show PDF download button if download info is present */} - {pdfDownloadInfo && ( - - )} {/* Show message content if there's any text besides the download */} {messageContent && (
)} + {documentDownloadInfos.length > 0 && ( +
+ {documentDownloadInfos.map((downloadInfo, index) => ( +
+ {index > 0 &&
} + +
+ ))} +
+ )}
diff --git a/web/src/components/next-message-item/index.tsx b/web/src/components/next-message-item/index.tsx index b9e1b32324..ac63a6661a 100644 --- a/web/src/components/next-message-item/index.tsx +++ b/web/src/components/next-message-item/index.tsx @@ -25,12 +25,8 @@ import { citationMarkerReg } from '@/utils/citation-utils'; import { getDirAttribute } from '@/utils/text-direction'; import { isEmpty } from 'lodash'; import { Atom, ChevronDown, ChevronUp } from 'lucide-react'; +import { DocumentDownloadButton } from '../document-download-button'; import MarkdownContent from '../next-markdown-content'; -import { - PDFDownloadButton, - extractPDFDownloadInfo, - removePDFDownloadInfo, -} from '../pdf-download-button'; import { RAGFlowAvatar } from '../ragflow-avatar'; import SvgIcon from '../svg-icon'; import { useTheme } from '../theme-provider'; @@ -102,19 +98,11 @@ function MessageItem({ return Object.values(docs); }, [reference?.doc_aggs]); - // Extract PDF download info from message content - const pdfDownloadInfo = useMemo( - () => extractPDFDownloadInfo(item.content), - [item.content], + const documentDownloadInfos = useMemo( + () => item.downloads ?? [], + [item.downloads], ); - - // If we have PDF download info, extract the remaining text - const messageContent = useMemo(() => { - if (!pdfDownloadInfo) return item.content; - - // Remove the JSON part from the content to avoid showing it - return removePDFDownloadInfo(item.content, pdfDownloadInfo); - }, [item.content, pdfDownloadInfo]); + const messageContent = item.content; const handleRegenerateMessage = useCallback(() => { regenerateMessage?.(item); @@ -137,9 +125,7 @@ function MessageItem({ ); const renderContent = useCallback(() => { - /* Show message content if there's any text besides the download */ - - if (pdfDownloadInfo) { + if (!messageContent && !(item.data || (sendLoading && !isShare))) { return null; } @@ -175,7 +161,6 @@ function MessageItem({ item.data, loading, messageContent, - pdfDownloadInfo, reference, sendLoading, theme, @@ -239,7 +224,7 @@ function MessageItem({ {isShare && !sendLoading && !isEmpty(item.content) && ( ) : ( )} - {/* Show PDF download button if download info is present */} - {pdfDownloadInfo && ( - - )} - {renderContent()} {isAssistant && ( @@ -320,6 +297,16 @@ function MessageItem({ files={item.files as File[] | UploadResponseDataType[]} > )} + {documentDownloadInfos.length > 0 && ( +
+ {documentDownloadInfos.map((downloadInfo, index) => ( +
+ {index > 0 &&
} + +
+ ))} +
+ )} {/* {isAssistant && item.attachment && item.attachment.doc_id && (
-
- ); -} - -// Helper function to detect if content contains document download info -export function extractPDFDownloadInfo( - content: string, -): DocumentDownloadInfo | null { - try { - // Try to parse as JSON first (for pure JSON content) - const parsed = JSON.parse(content); - if (parsed && parsed.filename && parsed.base64 && parsed.mime_type) { - // Accept PDF, DOCX, and TXT formats - const validMimeTypes = [ - 'application/pdf', - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - 'text/plain', - ]; - if (validMimeTypes.includes(parsed.mime_type)) { - return parsed as DocumentDownloadInfo; - } - } - } catch { - // If direct parsing fails, try to extract JSON object from mixed content - // Look for a JSON object that contains the required fields - // This regex finds a balanced JSON object by counting braces - const startPattern = /\{[^{}]*"filename"[^{}]*:/g; - let match; - - while ((match = startPattern.exec(content)) !== null) { - const startIndex = match.index; - let braceCount = 0; - let endIndex = startIndex; - - // Find the matching closing brace - for (let i = startIndex; i < content.length; i++) { - if (content[i] === '{') braceCount++; - if (content[i] === '}') braceCount--; - - if (braceCount === 0) { - endIndex = i + 1; - break; - } - } - - if (endIndex > startIndex) { - try { - const jsonStr = content.substring(startIndex, endIndex); - const parsed = JSON.parse(jsonStr); - if (parsed && parsed.filename && parsed.base64 && parsed.mime_type) { - // Accept PDF, DOCX, and TXT formats - const validMimeTypes = [ - 'application/pdf', - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - 'text/plain', - ]; - if (validMimeTypes.includes(parsed.mime_type)) { - return parsed as DocumentDownloadInfo; - } - } - } catch { - // This wasn't valid JSON, continue searching - } - } - } - } - return null; -} - -// Helper function to remove document download info from content -export function removePDFDownloadInfo( - content: string, - downloadInfo: DocumentDownloadInfo, -): string { - try { - // First, check if the entire content is just the JSON (most common case) - try { - const parsed = JSON.parse(content); - if ( - parsed && - parsed.filename === downloadInfo.filename && - parsed.base64 === downloadInfo.base64 - ) { - // The entire content is just the download JSON, return empty - return ''; - } - } catch { - // Content is not pure JSON, continue with removal - } - - // Try to remove the JSON string from content - const jsonStr = JSON.stringify(downloadInfo); - let cleaned = content.replace(jsonStr, '').trim(); - - // Also try with pretty-printed JSON (with indentation) - const prettyJsonStr = JSON.stringify(downloadInfo, null, 2); - cleaned = cleaned.replace(prettyJsonStr, '').trim(); - - // Also try to find and remove JSON object pattern from mixed content - // This handles cases where the JSON might have different formatting - const startPattern = /\{[^{}]*"filename"[^{}]*"base64"[^{}]*\}/g; - cleaned = cleaned.replace(startPattern, '').trim(); - - return cleaned; - } catch { - return content; - } -} diff --git a/web/src/constants/agent.tsx b/web/src/constants/agent.tsx index d5755e2e03..f52caffccc 100644 --- a/web/src/constants/agent.tsx +++ b/web/src/constants/agent.tsx @@ -108,7 +108,7 @@ export enum Operator { UserFillUp = 'UserFillUp', StringTransform = 'StringTransform', SearXNG = 'SearXNG', - PDFGenerator = 'PDFGenerator', + DocGenerator = 'DocGenerator', Placeholder = 'Placeholder', DataOperations = 'DataOperations', ListOperations = 'ListOperations', diff --git a/web/src/interfaces/database/chat.ts b/web/src/interfaces/database/chat.ts index eeb298fc1d..5cce383f59 100644 --- a/web/src/interfaces/database/chat.ts +++ b/web/src/interfaces/database/chat.ts @@ -1,6 +1,13 @@ import { MessageType } from '@/constants/chat'; import { IAttachment } from '@/hooks/use-send-message'; +export interface IDocumentDownloadInfo { + doc_id: string; + filename: string; + mime_type: string; + size?: number; +} + export interface PromptConfig { empty_response: string; parameters: Parameter[]; @@ -104,6 +111,7 @@ export interface Message { files?: (File | UploadResponseDataType)[]; chatBoxId?: string; attachment?: IAttachment; + downloads?: IDocumentDownloadInfo[]; } export interface IReferenceChunk { @@ -134,6 +142,7 @@ export interface IReferenceObject { export interface IAnswer { answer: string; attachment?: IAttachment; + downloads?: IDocumentDownloadInfo[]; reference?: IReference; conversationId?: string; prompt?: string; diff --git a/web/src/locales/ar.ts b/web/src/locales/ar.ts index 9680863aba..711dd010ce 100644 --- a/web/src/locales/ar.ts +++ b/web/src/locales/ar.ts @@ -1517,12 +1517,8 @@ export default { searXNG: 'احرق XNG', searXNGDescription: 'مكون يبحث عبر عنوان URL لمثيل SearXNG المقدم. حدد TopN وعنوان URL للمثيل.', - pdfGenerator: 'مولد المستندات', - pDFGenerator: 'مولد المستندات', - pdfGeneratorDescription: - 'مكون يقوم بإنشاء المستندات (PDF، DOCX، TXT) من محتوى بتنسيق تخفيض السعر مع تصميم وصور وجداول قابلة للتخصيص. يدعم: **غامق**، *مائل*، # عناوين، - قوائم، جداول مع | بناء الجملة.', - pDFGeneratorDescription: - 'مكون يقوم بإنشاء المستندات (PDF، DOCX، TXT) من محتوى بتنسيق تخفيض السعر مع تصميم وصور وجداول قابلة للتخصيص. يدعم: **غامق**، *مائل*، # عناوين، - قوائم، جداول مع | بناء الجملة.', + docGenerator: 'مولد المستندات', + docGeneratorDescription: 'ينشئ ملفًا من محتوى Markdown.', subtitle: 'الترجمة', logoImage: 'صورة الشعار', logoPosition: 'موقف الشعار', diff --git a/web/src/locales/bg.ts b/web/src/locales/bg.ts index c3a9f86982..1a22ad26fb 100644 --- a/web/src/locales/bg.ts +++ b/web/src/locales/bg.ts @@ -1569,10 +1569,8 @@ The above is the content you need to summarize.`, searXNG: 'SearXNG', searXNGDescription: 'Компонент, който търси чрез вашия SearXNG инстанция URL. Укажете TopN и URL на инстанцията.', - pdfGenerator: 'Генератор на документи', - pDFGenerator: 'Генератор на документи', - pdfGeneratorDescription: `Компонент, който генерира документи (PDF, DOCX, TXT) от markdown-форматирано съдържание с персонализирано стилизиране, изображения и таблици.`, - pDFGeneratorDescription: `Компонент, който генерира документи (PDF, DOCX, TXT) от markdown-форматирано съдържание с персонализирано стилизиране, изображения и таблици.`, + docGenerator: 'Генератор на документи', + docGeneratorDescription: `Генерира файл от Markdown съдържание.`, subtitle: 'Подзаглавие', logoImage: 'Лого изображение', logoPosition: 'Позиция на логото', diff --git a/web/src/locales/de.ts b/web/src/locales/de.ts index 256d21869a..74ff077ec5 100644 --- a/web/src/locales/de.ts +++ b/web/src/locales/de.ts @@ -1624,10 +1624,8 @@ Beispiel: Virtual Hosted Style`, searXNG: 'SearXNG', searXNGDescription: 'Eine Komponente, die auf https://searxng.org/ sucht und Ihnen ermöglicht, die Anzahl der Suchergebnisse mit TopN anzugeben. Sie ergänzt die vorhandenen Wissensdatenbanken.', - pdfGenerator: 'Dokumentengenerator', - pDFGenerator: 'Dokumentengenerator', - pdfGeneratorDescription: `Eine Komponente, die Dokumente (PDF, DOCX, TXT) aus markdown-formatierten Inhalten mit anpassbarem Stil, Bildern und Tabellen generiert. Unterstützt: **fett**, *kursiv*, # Überschriften, - Listen, Tabellen mit | Syntax.`, - pDFGeneratorDescription: `Eine Komponente, die Dokumente (PDF, DOCX, TXT) aus markdown-formatierten Inhalten mit anpassbarem Stil, Bildern und Tabellen generiert. Unterstützt: **fett**, *kursiv*, # Überschriften, - Listen, Tabellen mit | Syntax.`, + docGenerator: 'Dokumentengenerator', + docGeneratorDescription: `Erzeugt eine Datei aus Markdown-Inhalten.`, subtitle: 'Untertitel', logoImage: 'Logo-Bild', logoPosition: 'Logo-Position', diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index f3fc463727..289eee6754 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1692,10 +1692,8 @@ Best for: Documents with flowing, contextually connected content — such as boo searXNG: 'SearXNG', searXNGDescription: 'A component that searches via your provided SearXNG instance URL. Specify TopN and the instance URL.', - pdfGenerator: 'Docs Generator', - pDFGenerator: 'Docs Generator', - pdfGeneratorDescription: `A component that generates documents (PDF, DOCX, TXT) from markdown-formatted content with customizable styling, images, and tables. Supports: **bold**, *italic*, # headings, - lists, tables with | syntax.`, - pDFGeneratorDescription: `A component that generates documents (PDF, DOCX, TXT) from markdown-formatted content with customizable styling, images, and tables. Supports: **bold**, *italic*, # headings, - lists, tables with | syntax.`, + docGenerator: 'Doc Generator', + docGeneratorDescription: `Generate a file from Markdown content.`, subtitle: 'Subtitle', logoImage: 'Logo Image', logoPosition: 'Logo Position', diff --git a/web/src/locales/es.ts b/web/src/locales/es.ts index 3b36b1da13..02bdcef84f 100644 --- a/web/src/locales/es.ts +++ b/web/src/locales/es.ts @@ -611,10 +611,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Un componente que busca a través de la URL de la instancia SearXNG que proporcionas. Especifica TopN y la URL de la instancia.', - pdfGenerator: 'Generador de Documentos', - pDFGenerator: 'Generador de Documentos', - pdfGeneratorDescription: `Un componente que genera documentos (PDF, DOCX, TXT) desde contenido formateado en markdown con estilo personalizable, imágenes y tablas. Soporta: **negrita**, *cursiva*, # encabezados, - listas, tablas con sintaxis |.`, - pDFGeneratorDescription: `Un componente que genera documentos (PDF, DOCX, TXT) desde contenido formateado en markdown con estilo personalizable, imágenes y tablas. Soporta: **negrita**, *cursiva*, # encabezados, - listas, tablas con sintaxis |.`, + docGenerator: 'Generador de Documentos', + docGeneratorDescription: `Genera un archivo a partir de contenido Markdown.`, subtitle: 'Subtítulo', logoImage: 'Imagen Logo', logoPosition: 'Posición Logo', diff --git a/web/src/locales/fr.ts b/web/src/locales/fr.ts index b1fb01476d..2a80fc54b9 100644 --- a/web/src/locales/fr.ts +++ b/web/src/locales/fr.ts @@ -831,10 +831,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: "Un composant qui effectue des recherches via la URL de l'instance de SearXNG que vous fournissez. Spécifiez TopN et l'URL de l'instance.", - pdfGenerator: 'Générateur de Documents', - pDFGenerator: 'Générateur de Documents', - pdfGeneratorDescription: `Un composant qui génère des documents (PDF, DOCX, TXT) à partir de contenu formaté en markdown avec un style personnalisable, des images et des tableaux. Prend en charge : **gras**, *italique*, # titres, - listes, tableaux avec syntaxe |.`, - pDFGeneratorDescription: `Un composant qui génère des documents (PDF, DOCX, TXT) à partir de contenu formaté en markdown avec un style personnalisable, des images et des tableaux. Prend en charge : **gras**, *italique*, # titres, - listes, tableaux avec syntaxe |.`, + docGenerator: 'Générateur de Documents', + docGeneratorDescription: `Génère un fichier à partir de contenu Markdown.`, subtitle: 'Sous-titre', logoImage: 'Image Logo', logoPosition: 'Position Logo', diff --git a/web/src/locales/id.ts b/web/src/locales/id.ts index 95c479de92..6dbb2cce60 100644 --- a/web/src/locales/id.ts +++ b/web/src/locales/id.ts @@ -810,10 +810,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Komponen yang melakukan pencarian menggunakan URL instance SearXNG yang Anda berikan. Spesifikasikan TopN dan URL instance.', - pdfGenerator: 'Pembuat Dokumen', - pDFGenerator: 'Pembuat Dokumen', - pdfGeneratorDescription: `Komponen yang menghasilkan dokumen (PDF, DOCX, TXT) dari konten berformat markdown dengan gaya yang dapat disesuaikan, gambar, dan tabel. Mendukung: **tebal**, *miring*, # judul, - daftar, tabel dengan sintaks |.`, - pDFGeneratorDescription: `Komponen yang menghasilkan dokumen (PDF, DOCX, TXT) dari konten berformat markdown dengan gaya yang dapat disesuaikan, gambar, dan tabel. Mendukung: **tebal**, *miring*, # judul, - daftar, tabel dengan sintaks |.`, + docGenerator: 'Pembuat Dokumen', + docGeneratorDescription: `Menghasilkan file dari konten Markdown.`, subtitle: 'Subjudul', logoImage: 'Gambar Logo', logoPosition: 'Posisi Logo', diff --git a/web/src/locales/it.ts b/web/src/locales/it.ts index 70e59edc29..c72e4e179a 100644 --- a/web/src/locales/it.ts +++ b/web/src/locales/it.ts @@ -969,10 +969,8 @@ Quanto sopra è il contenuto che devi riassumere.`, searXNG: 'SearXNG', searXNGDescription: 'Un componente che cerca tramite lURL dellistanza SearXNG fornita. Specifica TopN e lURL dellistanza.', - pdfGenerator: 'Generatore Documenti', - pDFGenerator: 'Generatore Documenti', - pdfGeneratorDescription: `Un componente che genera documenti (PDF, DOCX, TXT) da contenuti formattati in markdown con stile personalizzabile, immagini e tabelle. Supporta: **grassetto**, *corsivo*, # titoli, - elenchi, tabelle con sintassi |.`, - pDFGeneratorDescription: `Un componente che genera documenti (PDF, DOCX, TXT) da contenuti formattati in markdown con stile personalizzabile, immagini e tabelle. Supporta: **grassetto**, *corsivo*, # titoli, - elenchi, tabelle con sintassi |.`, + docGenerator: 'Generatore Documenti', + docGeneratorDescription: `Genera file da contenuto Markdown.`, subtitle: 'Sottotitolo', logoImage: 'Immagine Logo', logoPosition: 'Posizione Logo', diff --git a/web/src/locales/ja.ts b/web/src/locales/ja.ts index 014635b47d..b9335508f5 100644 --- a/web/src/locales/ja.ts +++ b/web/src/locales/ja.ts @@ -820,10 +820,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'SearXNGのインスタンスURLを提供して検索を行うコンポーネント。TopNとインスタンスURLを指定してください。', - pdfGenerator: 'ドキュメント生成', - pDFGenerator: 'ドキュメント生成', - pdfGeneratorDescription: `マークダウン形式のコンテンツからドキュメント(PDF、DOCX、TXT)を生成するコンポーネント。カスタムスタイル、画像、テーブルをサポート。サポート:**太字**、*斜体*、# 見出し、- リスト、| 構文のテーブル。`, - pDFGeneratorDescription: `マークダウン形式のコンテンツからドキュメント(PDF、DOCX、TXT)を生成するコンポーネント。カスタムスタイル、画像、テーブルをサポート。サポート:**太字**、*斜体*、# 見出し、- リスト、| 構文のテーブル。`, + docGenerator: 'ドキュメント生成', + docGeneratorDescription: `Markdown コンテンツからファイルを生成します。`, subtitle: 'サブタイトル', logoImage: 'ロゴ画像', logoPosition: 'ロゴ位置', diff --git a/web/src/locales/pt-br.ts b/web/src/locales/pt-br.ts index f8a5a7430e..d2675040b5 100644 --- a/web/src/locales/pt-br.ts +++ b/web/src/locales/pt-br.ts @@ -768,10 +768,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Um componente que realiza buscas via URL da instância SearXNG que você fornece. Especifique TopN e URL da instância.', - pdfGenerator: 'Gerador de Documentos', - pDFGenerator: 'Gerador de Documentos', - pdfGeneratorDescription: `Um componente que gera documentos (PDF, DOCX, TXT) de conteúdo formatado em markdown com estilo personalizável, imagens e tabelas. Suporta: **negrito**, *itálico*, # títulos, - listas, tabelas com sintaxe |.`, - pDFGeneratorDescription: `Um componente que gera documentos (PDF, DOCX, TXT) de conteúdo formatado em markdown com estilo personalizável, imagens e tabelas. Suporta: **negrito**, *itálico*, # títulos, - listas, tabelas com sintaxe |.`, + docGenerator: 'Gerador de Documentos', + docGeneratorDescription: `Gera um arquivo a partir de conteúdo Markdown.`, subtitle: 'Subtítulo', logoImage: 'Imagem Logo', logoPosition: 'Posição Logo', diff --git a/web/src/locales/ru.ts b/web/src/locales/ru.ts index 9ed1af9f9a..c5f9c3794d 100644 --- a/web/src/locales/ru.ts +++ b/web/src/locales/ru.ts @@ -1687,10 +1687,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Компонент, который выполняет поиск через ваш предоставленный URL экземпляра SearXNG. Укажите TopN и URL экземпляра.', - pdfGenerator: 'Генератор документов', - pDFGenerator: 'Генератор документов', - pdfGeneratorDescription: `Компонент, который генерирует документы (PDF, DOCX, TXT) из содержимого в формате markdown с настраиваемым стилем, изображениями и таблицами. Поддерживает: **жирный**, *курсив*, # заголовки, - списки, таблицы с синтаксисом |.`, - pDFGeneratorDescription: `Компонент, который генерирует документы (PDF, DOCX, TXT) из содержимого в формате markdown с настраиваемым стилем, изображениями и таблицами. Поддерживает: **жирный**, *курсив*, # заголовки, - списки, таблицы с синтаксисом |.`, + docGenerator: 'Генератор документов', + docGeneratorDescription: `Создает файл из содержимого Markdown.`, subtitle: 'Подзаголовок', logoImage: 'Изображение логотипа', logoPosition: 'Позиция логотипа', diff --git a/web/src/locales/tr.ts b/web/src/locales/tr.ts index fa3eb96a78..8aac442de5 100644 --- a/web/src/locales/tr.ts +++ b/web/src/locales/tr.ts @@ -1626,10 +1626,8 @@ Bu otomatik etiketleme özelliği, mevcut datasete alanına özgü bilgi katman searXNG: 'SearXNG', searXNGDescription: "Sağlanan SearXNG örnek URL'si üzerinden arama yapan bir bileşen.", - pdfGenerator: 'Belge Oluşturucu', - pDFGenerator: 'Belge Oluşturucu', - pdfGeneratorDescription: `Markdown biçimli içerikten belgeler (PDF, DOCX, TXT) oluşturan bir bileşen.`, - pDFGeneratorDescription: `Markdown biçimli içerikten belgeler (PDF, DOCX, TXT) oluşturan bir bileşen.`, + docGenerator: 'Belge Oluşturucu', + docGeneratorDescription: `Markdown içeriğinden bir dosya oluşturur.`, subtitle: 'Alt başlık', logoImage: 'Logo Görüntüsü', logoPosition: 'Logo Konumu', diff --git a/web/src/locales/vi.ts b/web/src/locales/vi.ts index befed677eb..d9225f0851 100644 --- a/web/src/locales/vi.ts +++ b/web/src/locales/vi.ts @@ -852,10 +852,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Một thành phần tìm kiếm thông qua URL phiên bản SearXNG bạn cung cấp. Chỉ định TopN và URL phiên bản.', - pdfGenerator: 'Trình tạo Tài liệu', - pDFGenerator: 'Trình tạo Tài liệu', - pdfGeneratorDescription: `Một thành phần tạo tài liệu (PDF, DOCX, TXT) từ nội dung định dạng markdown với kiểu tùy chỉnh, hình ảnh và bảng. Hỗ trợ: **in đậm**, *in nghiêng*, # tiêu đề, - danh sách, bảng với cú pháp |.`, - pDFGeneratorDescription: `Một thành phần tạo tài liệu (PDF, DOCX, TXT) từ nội dung định dạng markdown với kiểu tùy chỉnh, hình ảnh và bảng. Hỗ trợ: **in đậm**, *in nghiêng*, # tiêu đề, - danh sách, bảng với cú pháp |.`, + docGenerator: 'Trình tạo Tài liệu', + docGeneratorDescription: `Tạo tệp từ nội dung Markdown.`, subtitle: 'Phụ đề', logoImage: 'Hình ảnh Logo', logoPosition: 'Vị trí Logo', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index f65599cff5..5153677371 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -906,10 +906,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: '此組件透過您提供的 SearXNG 實例 URL 進行搜尋。請設定 Top N 和實例 URL。', - pdfGenerator: '文檔生成器', - pPDFGenerator: '文檔生成器', - pdfGeneratorDescription: `該組件從 markdown 格式的內容生成文檔(PDF、DOCX、TXT),支援自定義樣式、圖片和表格。支援:**粗體**、*斜體*、# 標題、- 列表、使用 | 語法的表格。`, - pPDFGeneratorDescription: `該組件從 markdown 格式的內容生成文檔(PDF、DOCX、TXT),支援自定義樣式、圖片和表格。支援:**粗體**、*斜體*、# 標題、- 列表、使用 | 語法的表格。`, + docGenerator: '文檔生成器', + docGeneratorDescription: `從 Markdown 內容產生檔案。`, subtitle: '副標題', logoImage: '標誌圖片', logoPosition: '標誌位置', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index bb4918fcf1..a47ebddabe 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -1455,10 +1455,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 searXNG: 'SearXNG', searXNGDescription: '该组件通过您提供的 SearXNG 实例地址进行搜索。请设置 Top N 和实例 URL。', - pdfGenerator: '文档生成器', - pDFGenerator: '文档生成器', - pdfGeneratorDescription: `该组件从 markdown 格式的内容生成文档(PDF、DOCX、TXT),支持自定义样式、图片和表格。支持:**粗体**、*斜体*、# 标题、- 列表、使用 | 语法的表格。`, - pDFGeneratorDescription: `该组件从 markdown 格式的内容生成文档(PDF、DOCX、TXT),支持自定义样式、图片和表格。支持:**粗体**、*斜体*、# 标题、- 列表、使用 | 语法的表格。`, + docGenerator: '文档生成器', + docGeneratorDescription: `从 Markdown 内容生成文件。`, subtitle: '副标题', logoImage: '标志图片', logoPosition: '标志位置', diff --git a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx index b860dbc9ef..e8bee6f593 100644 --- a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx +++ b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx @@ -122,7 +122,7 @@ export function AccordionOperators({ Operator.Invoke, Operator.WenCai, Operator.SearXNG, - Operator.PDFGenerator, + Operator.DocGenerator, ]} isCustomDropdown={isCustomDropdown} mousePosition={mousePosition} diff --git a/web/src/pages/agent/chat/use-send-agent-message.ts b/web/src/pages/agent/chat/use-send-agent-message.ts index 35bb078e4f..8208ffb754 100644 --- a/web/src/pages/agent/chat/use-send-agent-message.ts +++ b/web/src/pages/agent/chat/use-send-agent-message.ts @@ -86,6 +86,7 @@ export function findMessageFromList(eventList: IEventList) { content: nextContent, audio_binary: audioBinary, attachment: workflowFinished?.data?.outputs?.attachment || {}, + downloads: workflowFinished?.data?.outputs?.downloads || [], }; } @@ -441,7 +442,7 @@ export const useSendAgentMessage = ({ }, [sendMessageInTaskMode]); useEffect(() => { - const { content, id, attachment, audio_binary } = + const { content, id, attachment, audio_binary, downloads } = findMessageFromList(answerList); const inputAnswer = findInputFromList(answerList); const answer = content || getLatestError(answerList); @@ -451,6 +452,7 @@ export const useSendAgentMessage = ({ answer: answer ?? '', audio_binary: audio_binary, attachment: attachment as IAttachment, + downloads, id: id, ...inputAnswer, }); diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx index 490c1c42c3..d4fd25335b 100644 --- a/web/src/pages/agent/constant/index.tsx +++ b/web/src/pages/agent/constant/index.tsx @@ -696,7 +696,7 @@ export const RestrictedUpstreamMap = { [Operator.Loop]: [Operator.Begin], [Operator.LoopStart]: [Operator.Begin], [Operator.ExitLoop]: [Operator.Begin], - [Operator.PDFGenerator]: [Operator.Begin], + [Operator.DocGenerator]: [Operator.Begin], }; export const NodeMap = { @@ -747,7 +747,7 @@ export const NodeMap = { [Operator.LoopStart]: 'loopStartNode', [Operator.ExitLoop]: 'exitLoopNode', [Operator.ExcelProcessor]: 'ragNode', - [Operator.PDFGenerator]: 'ragNode', + [Operator.DocGenerator]: 'ragNode', }; export enum BeginQueryType { @@ -963,68 +963,18 @@ export enum AgentVariableType { Conversation = 'conversation', } -// PDF Generator enums -export enum PDFGeneratorFontFamily { - Helvetica = 'Helvetica', - TimesRoman = 'Times-Roman', - Courier = 'Courier', - HelveticaBold = 'Helvetica-Bold', - TimesBold = 'Times-Bold', -} - -export enum PDFGeneratorLogoPosition { - Left = 'left', - Center = 'center', - Right = 'right', -} - -export enum PDFGeneratorPageSize { - A4 = 'A4', - Letter = 'Letter', -} - -export enum PDFGeneratorOrientation { - Portrait = 'portrait', - Landscape = 'landscape', -} - -export const initialPDFGeneratorValues = { +export const initialDocGeneratorValues = { output_format: 'pdf', content: '', - title: '', - subtitle: '', + filename: '', header_text: '', footer_text: '', - logo_image: '', - logo_position: PDFGeneratorLogoPosition.Left, - logo_width: 2.0, - logo_height: 1.0, - font_family: PDFGeneratorFontFamily.Helvetica, - font_size: 12, - title_font_size: 24, - heading1_font_size: 18, - heading2_font_size: 16, - heading3_font_size: 14, - text_color: '#000000', - title_color: '#000000', - page_size: PDFGeneratorPageSize.A4, - orientation: PDFGeneratorOrientation.Portrait, - margin_top: 1.0, - margin_bottom: 1.0, - margin_left: 1.0, - margin_right: 1.0, - line_spacing: 1.2, - filename: '', - output_directory: '/tmp/pdf_outputs', + watermark_text: '', add_page_numbers: true, add_timestamp: true, - watermark_text: '', - enable_toc: false, + font_size: 12, outputs: { - file_path: { type: 'string' }, - pdf_base64: { type: 'string' }, download: { type: 'string' }, - success: { type: 'boolean' }, }, }; diff --git a/web/src/pages/agent/form-sheet/form-config-map.tsx b/web/src/pages/agent/form-sheet/form-config-map.tsx index 7fe720885f..2baafbf16a 100644 --- a/web/src/pages/agent/form-sheet/form-config-map.tsx +++ b/web/src/pages/agent/form-sheet/form-config-map.tsx @@ -7,6 +7,7 @@ import CategorizeForm from '../form/categorize-form'; import CodeForm from '../form/code-form'; import CrawlerForm from '../form/crawler-form'; import DataOperationsForm from '../form/data-operations-form'; +import DocGeneratorForm from '../form/doc-generator-form'; import DuckDuckGoForm from '../form/duckduckgo-form'; import EmailForm from '../form/email-form'; import ExeSQLForm from '../form/exesql-form'; @@ -21,7 +22,6 @@ import ListOperationsForm from '../form/list-operations-form'; import LoopForm from '../form/loop-form'; import MessageForm from '../form/message-form'; import ParserForm from '../form/parser-form'; -import PDFGeneratorForm from '../form/pdf-generator-form'; import PubMedForm from '../form/pubmed-form'; import RetrievalForm from '../form/retrieval-form/next'; import RewriteQuestionForm from '../form/rewrite-question-form'; @@ -111,8 +111,8 @@ export const FormConfigMap = { [Operator.SearXNG]: { component: SearXNGForm, }, - [Operator.PDFGenerator]: { - component: PDFGeneratorForm, + [Operator.DocGenerator]: { + component: DocGeneratorForm, }, [Operator.Note]: { component: () => <>, diff --git a/web/src/pages/agent/form/doc-generator-form/index.tsx b/web/src/pages/agent/form/doc-generator-form/index.tsx new file mode 100644 index 0000000000..e9d0e82dcb --- /dev/null +++ b/web/src/pages/agent/form/doc-generator-form/index.tsx @@ -0,0 +1,254 @@ +import { FormContainer } from '@/components/form-container'; +import { + Form, + FormControl, + FormField, + FormItem, + FormLabel, + FormMessage, +} from '@/components/ui/form'; +import { Input } from '@/components/ui/input'; +import { RAGFlowSelect } from '@/components/ui/select'; +import { Switch } from '@/components/ui/switch'; +import { zodResolver } from '@hookform/resolvers/zod'; +import { t } from 'i18next'; +import { memo, useEffect, useMemo } from 'react'; +import { useForm } from 'react-hook-form'; +import { z } from 'zod'; +import { INextOperatorForm } from '../../interface'; +import { FormWrapper } from '../components/form-wrapper'; +import { Output, transferOutputs } from '../components/output'; +import { PromptEditor } from '../components/prompt-editor'; +import { useValues } from './use-values'; +import { useWatchFormChange } from './use-watch-form-change'; + +function DocGeneratorForm({ node }: INextOperatorForm) { + const values = useValues(node); + + const FormSchema = z.object({ + output_format: z.string().default('pdf'), + content: z.string().min(1, 'Content is required'), + filename: z.string().optional(), + header: z.string().optional(), + footer: z.string().optional(), + watermark: z.string().optional(), + add_page_numbers: z.boolean(), + add_timestamp: z.boolean(), + font_size: z.coerce.number().min(12, 'Font size must be at least 12'), + outputs: z.object({ + download: z.object({ type: z.string() }), + }), + }); + + const form = useForm>({ + defaultValues: values, + resolver: zodResolver(FormSchema), + }); + + const outputFormat = form.watch('output_format'); + const formOutputs = form.watch('outputs'); + + const supportsDocumentDecorations = + outputFormat === 'pdf' || outputFormat === 'docx'; + + const supportsTimestamp = + outputFormat === 'pdf' || + outputFormat === 'docx' || + outputFormat === 'txt' || + outputFormat === 'markdown' || + outputFormat === 'html'; + + const outputList = useMemo(() => { + return transferOutputs(formOutputs ?? values.outputs); + }, [formOutputs, values.outputs]); + + useEffect(() => { + form.setValue('outputs', values.outputs); + }, [form, values.outputs]); + + useWatchFormChange(node?.id, form); + + return ( +
+ + + ( + + Output Format + + + + + + )} + /> + + ( + + {t('flow.content')} + + + + + + )} + /> + + ( + + {t('flow.filename')} + + + + + + )} + /> + + {supportsDocumentDecorations && ( + <> + ( + + {t('flow.fontSize')} + + field.onChange(e.target.value)} + onBlur={(e) => { + field.onBlur(); + const value = Number(e.target.value); + field.onChange( + Number.isFinite(value) && value >= 12 ? value : 12, + ); + }} + /> + + + + )} + /> + + ( + + Header Text + + + + + + )} + /> + + ( + + Footer Text + + + + + + )} + /> + {outputFormat === 'pdf' && ( + ( + + {t('flow.watermarkText')} + + + + + + )} + /> + )} + + ( + + {t('flow.addPageNumbers')} + + + + + )} + /> + + )} + + {supportsTimestamp && ( + ( + + {t('flow.addTimestamp')} + + + + + )} + /> + )} + +
} + /> +
+
+
+ +
+
+ ); +} + +export default memo(DocGeneratorForm); diff --git a/web/src/pages/agent/form/doc-generator-form/use-values.ts b/web/src/pages/agent/form/doc-generator-form/use-values.ts new file mode 100644 index 0000000000..e4426ae8a5 --- /dev/null +++ b/web/src/pages/agent/form/doc-generator-form/use-values.ts @@ -0,0 +1,30 @@ +import { useMemo } from 'react'; +import { Node } from 'reactflow'; +import { initialDocGeneratorValues } from '../../constant'; + +export const useValues = (node?: Node) => { + const values = useMemo(() => { + const supportedOutputFormats = ['pdf', 'docx', 'txt', 'markdown', 'html']; + const nextValues = { + ...initialDocGeneratorValues, + ...(node?.data.form ?? {}), + }; + + return { + output_format: supportedOutputFormats.includes(nextValues.output_format) + ? nextValues.output_format + : initialDocGeneratorValues.output_format, + content: nextValues.content, + filename: nextValues.filename, + header_text: nextValues.header_text, + footer_text: nextValues.footer_text, + watermark_text: nextValues.watermark_text, + add_page_numbers: nextValues.add_page_numbers, + add_timestamp: nextValues.add_timestamp, + font_size: Math.max(12, Number(nextValues.font_size) || 12), + outputs: initialDocGeneratorValues.outputs, + }; + }, [node?.data.form]); + + return values; +}; diff --git a/web/src/pages/agent/form/pdf-generator-form/use-watch-form-change.ts b/web/src/pages/agent/form/doc-generator-form/use-watch-form-change.ts similarity index 100% rename from web/src/pages/agent/form/pdf-generator-form/use-watch-form-change.ts rename to web/src/pages/agent/form/doc-generator-form/use-watch-form-change.ts diff --git a/web/src/pages/agent/form/pdf-generator-form/index.tsx b/web/src/pages/agent/form/pdf-generator-form/index.tsx deleted file mode 100644 index 3c3ce7f16d..0000000000 --- a/web/src/pages/agent/form/pdf-generator-form/index.tsx +++ /dev/null @@ -1,536 +0,0 @@ -import { FormContainer } from '@/components/form-container'; -import { - Form, - FormControl, - FormDescription, - FormField, - FormItem, - FormLabel, - FormMessage, -} from '@/components/ui/form'; -import { Input } from '@/components/ui/input'; -import { RAGFlowSelect } from '@/components/ui/select'; -import { Switch } from '@/components/ui/switch'; -import { zodResolver } from '@hookform/resolvers/zod'; -import { t } from 'i18next'; -import { memo, useMemo } from 'react'; -import { useForm } from 'react-hook-form'; -import { z } from 'zod'; -import { - PDFGeneratorFontFamily, - PDFGeneratorLogoPosition, - PDFGeneratorOrientation, - PDFGeneratorPageSize, -} from '../../constant'; -import { INextOperatorForm } from '../../interface'; -import { FormWrapper } from '../components/form-wrapper'; -import { Output, transferOutputs } from '../components/output'; -import { PromptEditor } from '../components/prompt-editor'; -import { useValues } from './use-values'; -import { useWatchFormChange } from './use-watch-form-change'; - -function PDFGeneratorForm({ node }: INextOperatorForm) { - const values = useValues(node); - - const FormSchema = z.object({ - output_format: z.string().default('pdf'), - content: z.string().min(1, 'Content is required'), - title: z.string().optional(), - subtitle: z.string().optional(), - header_text: z.string().optional(), - footer_text: z.string().optional(), - logo_image: z.string().optional(), - logo_position: z.string(), - logo_width: z.number(), - logo_height: z.number(), - font_family: z.string(), - font_size: z.number(), - title_font_size: z.number(), - heading1_font_size: z.number(), - heading2_font_size: z.number(), - heading3_font_size: z.number(), - text_color: z.string(), - title_color: z.string(), - page_size: z.string(), - orientation: z.string(), - margin_top: z.number(), - margin_bottom: z.number(), - margin_left: z.number(), - margin_right: z.number(), - line_spacing: z.number(), - filename: z.string().optional(), - output_directory: z.string(), - add_page_numbers: z.boolean(), - add_timestamp: z.boolean(), - watermark_text: z.string().optional(), - enable_toc: z.boolean(), - outputs: z.object({ - file_path: z.object({ type: z.string() }), - pdf_base64: z.object({ type: z.string() }), - download: z.object({ type: z.string() }), - success: z.object({ type: z.string() }), - }), - }); - - const form = useForm>({ - defaultValues: values, - resolver: zodResolver(FormSchema), - }); - - const formOutputs = form.watch('outputs'); - - const outputList = useMemo(() => { - return transferOutputs(formOutputs ?? values.outputs); - }, [formOutputs, values.outputs]); - - useWatchFormChange(node?.id, form); - - return ( -
- - - {/* Output Format Selection */} - ( - - Output Format - - - - - Choose the output document format - - - - )} - /> - - {/* Content Section */} - ( - - {t('flow.content')} - - - - -
-
- Markdown support: **bold**, *italic*, - `code`, # Heading 1, ## Heading 2 -
-
- Lists: - bullet or 1. numbered -
-
- Tables: | Column 1 | Column 2 | (use | to - separate columns, <br> or \n for line breaks in - cells) -
-
- Other: --- for horizontal line, ``` for - code blocks -
-
-
- -
- )} - /> - - {/* Title & Subtitle */} - ( - - {t('flow.title')} - - - - - - )} - /> - - ( - - {t('flow.subtitle')} - - - - - - )} - /> - - {/* Logo Settings */} - ( - - {t('flow.logoImage')} - -
- { - const file = e.target.files?.[0]; - if (file) { - const reader = new FileReader(); - reader.onloadend = () => { - field.onChange(reader.result as string); - }; - reader.readAsDataURL(file); - } - }} - className="cursor-pointer" - /> - -
-
- - Upload an image file or paste a file path/URL/base64 - - -
- )} - /> - - ( - - {t('flow.logoPosition')} - - ({ label: val, value: val }), - )} - > - - - - )} - /> - -
- ( - - {t('flow.logoWidth')} (inches) - - - field.onChange(parseFloat(e.target.value)) - } - /> - - - - )} - /> - - ( - - {t('flow.logoHeight')} (inches) - - - field.onChange(parseFloat(e.target.value)) - } - /> - - - - )} - /> -
- - {/* Font Settings */} - ( - - {t('flow.fontFamily')} - - ({ label: val, value: val }), - )} - > - - - - )} - /> - -
- ( - - {t('flow.fontSize')} - - field.onChange(parseInt(e.target.value))} - /> - - - - )} - /> - - ( - - {t('flow.titleFontSize')} - - field.onChange(parseInt(e.target.value))} - /> - - - - )} - /> -
- - {/* Page Settings */} - ( - - {t('flow.pageSize')} - - ({ - label: val, - value: val, - }))} - > - - - - )} - /> - - ( - - {t('flow.orientation')} - - ({ label: val, value: val }), - )} - > - - - - )} - /> - - {/* Margins */} -
- ( - - {t('flow.marginTop')} (inches) - - - field.onChange(parseFloat(e.target.value)) - } - /> - - - - )} - /> - - ( - - {t('flow.marginBottom')} (inches) - - - field.onChange(parseFloat(e.target.value)) - } - /> - - - - )} - /> -
- - {/* Output Settings */} - ( - - {t('flow.filename')} - - - - - - )} - /> - - ( - - {t('flow.outputDirectory')} - - - - - - )} - /> - - {/* Additional Options */} - ( - -
- {t('flow.addPageNumbers')} - - Add page numbers to the document - -
- - - -
- )} - /> - - ( - -
- {t('flow.addTimestamp')} - - Add generation timestamp to the document - -
- - - -
- )} - /> - - ( - - {t('flow.watermarkText')} - - - - - - )} - /> - -
} - /> -
-
-
- -
-
- ); -} - -export default memo(PDFGeneratorForm); diff --git a/web/src/pages/agent/form/pdf-generator-form/use-values.ts b/web/src/pages/agent/form/pdf-generator-form/use-values.ts deleted file mode 100644 index 1ecd829089..0000000000 --- a/web/src/pages/agent/form/pdf-generator-form/use-values.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { useMemo } from 'react'; -import { Node } from 'reactflow'; -import { initialPDFGeneratorValues } from '../../constant'; - -export const useValues = (node?: Node) => { - const values = useMemo(() => { - return node?.data.form ?? initialPDFGeneratorValues; - }, [node?.data.form]); - - return values; -}; diff --git a/web/src/pages/agent/hooks/use-add-node.ts b/web/src/pages/agent/hooks/use-add-node.ts index 3930ab8514..45f9179493 100644 --- a/web/src/pages/agent/hooks/use-add-node.ts +++ b/web/src/pages/agent/hooks/use-add-node.ts @@ -17,6 +17,7 @@ import { initialCodeValues, initialCrawlerValues, initialDataOperationsValues, + initialDocGeneratorValues, initialDuckValues, initialEmailValues, initialExeSqlValues, @@ -31,7 +32,6 @@ import { initialLoopValues, initialMessageValues, initialNoteValues, - initialPDFGeneratorValues, initialParserValues, initialPubMedValues, initialRetrievalValues, @@ -180,7 +180,7 @@ export const useInitializeOperatorParams = () => { [Operator.Loop]: initialLoopValues, [Operator.LoopStart]: {}, [Operator.ExitLoop]: {}, - [Operator.PDFGenerator]: initialPDFGeneratorValues, + [Operator.DocGenerator]: initialDocGeneratorValues, [Operator.ExcelProcessor]: {}, }; }, [llmId]); diff --git a/web/src/pages/agent/hooks/use-get-begin-query.tsx b/web/src/pages/agent/hooks/use-get-begin-query.tsx index 82265a3025..1c6e2aa03c 100644 --- a/web/src/pages/agent/hooks/use-get-begin-query.tsx +++ b/web/src/pages/agent/hooks/use-get-begin-query.tsx @@ -173,6 +173,38 @@ export function useBuildBeginDynamicVariableOptions() { const Env = 'env.'; +function splitOperatorOutputValue(value?: string) { + if (!value) { + return {}; + } + + const [nodeId, output] = value.split('@'); + return { nodeId, output }; +} + +function filterDocGeneratorDownloadOutputOptions( + groups: Array<{ + options: Array<{ value?: string } & Record>; + }>, + allowDocGeneratorDownloadOutput: boolean, + getOperatorTypeFromId: (nodeId?: string) => string | undefined, +) { + return groups.map((group) => ({ + ...group, + options: group.options.filter((option) => { + const { nodeId, output } = splitOperatorOutputValue(option.value); + if ( + output === 'download' && + getOperatorTypeFromId(nodeId) === Operator.DocGenerator + ) { + return allowDocGeneratorDownloadOutput; + } + + return true; + }), + })); +} + export function useBuildGlobalWithBeginVariableOptions() { const { data } = useFetchAgent(); const dynamicBeginOptions = useBuildBeginDynamicVariableOptions(); @@ -270,6 +302,9 @@ export function useBuildQueryVariableOptions({ } & BuildQueryVariableOptions = {}) { const node = useContext(AgentFormContext) || n; const nodes = useGraphStore((state) => state.nodes); + const getOperatorTypeFromId = useGraphStore( + (state) => state.getOperatorTypeFromId, + ); const options = useBuildVariableOptions(node?.id, node?.parentId); @@ -282,14 +317,22 @@ export function useBuildQueryVariableOptions({ [AgentVariableType.Begin]: globalWithBeginVariableOptions, [AgentVariableType.Conversation]: conversationOptions, }; + const allowDocGeneratorDownloadOutput = + node?.data?.label === Operator.Message; const nextOptions = useMemo(() => { - return [ - ...globalWithBeginVariableOptions, - ...conversationOptions, - ...options, - ]; - }, [conversationOptions, globalWithBeginVariableOptions, options]); + return filterDocGeneratorDownloadOutputOptions( + [...globalWithBeginVariableOptions, ...conversationOptions, ...options], + allowDocGeneratorDownloadOutput, + getOperatorTypeFromId, + ); + }, [ + allowDocGeneratorDownloadOutput, + conversationOptions, + getOperatorTypeFromId, + globalWithBeginVariableOptions, + options, + ]); // Which options are entirely under external control? if (!isEmpty(nodeIds) || !isEmpty(variablesExceptOperatorOutputs)) { @@ -299,10 +342,11 @@ export function useBuildQueryVariableOptions({ variablesExceptOperatorOutputs?.map((x) => AgentVariableOptionsMap[x]) ?? []; - return [ - ...flatten(variablesExceptOperatorOutputsOptions), - ...nodeOutputOptions, - ]; + return filterDocGeneratorDownloadOutputOptions( + [...flatten(variablesExceptOperatorOutputsOptions), ...nodeOutputOptions], + allowDocGeneratorDownloadOutput, + getOperatorTypeFromId, + ); } return nextOptions; } diff --git a/web/src/pages/agent/operator-icon.tsx b/web/src/pages/agent/operator-icon.tsx index 60c4028482..30a888257d 100644 --- a/web/src/pages/agent/operator-icon.tsx +++ b/web/src/pages/agent/operator-icon.tsx @@ -56,7 +56,7 @@ export const LucideIconMap = { [Operator.DataOperations]: FileCode, [Operator.Loop]: InfinityIcon, [Operator.ExitLoop]: LogOut, - [Operator.PDFGenerator]: FileText, + [Operator.DocGenerator]: FileText, }; const Empty = () => {