diff --git a/Dockerfile b/Dockerfile index b89cb266a0..fdc5f4c4bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,7 +42,8 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \ chmod 1777 /tmp && \ apt update && \ - apt install -y build-essential libglib2.0-0 libglx-mesa0 libgl1 pkg-config libicu-dev libgdiplus default-jdk libatk-bridge2.0-0 libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive fonts-freefont-ttf fonts-noto-cjk postgresql-client + apt install -y \ + build-essential libglib2.0-0 libglx-mesa0 libgl1 pkg-config libicu-dev libgdiplus default-jdk libatk-bridge2.0-0 libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev libjemalloc-dev gnupg unzip curl wget git vim less ghostscript pandoc texlive texlive-latex-extra texlive-xetex texlive-lang-chinese fonts-freefont-ttf fonts-noto-cjk postgresql-client # Download resource from GitHub to /usr/share/infinity RUN mkdir -p /usr/share/infinity/resource && \ diff --git a/agent/component/docs_generator.py b/agent/component/docs_generator.py index a3f165a552..3ab02c4cda 100644 --- a/agent/component/docs_generator.py +++ b/agent/component/docs_generator.py @@ -1,1570 +1,632 @@ +import logging import json import os import re -import base64 -from datetime import datetime +import shutil +import tempfile from abc import ABC -from io import BytesIO -from typing import Optional +from datetime import datetime from functools import partial -from reportlab.lib.pagesizes import A4 -from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle -from reportlab.lib.units import inch -from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY -from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, TableStyle, LongTable -from reportlab.lib import colors -from reportlab.pdfbase import pdfmetrics -from reportlab.pdfbase.ttfonts import TTFont -from reportlab.pdfbase.cidfonts import UnicodeCIDFont +from io import BytesIO +from xml.sax.saxutils import escape from agent.component.base import ComponentParamBase from api.utils.api_utils import timeout +from common import settings +from common.misc_utils import get_uuid from .message import Message -class PDFGeneratorParam(ComponentParamBase): +def sanitize_filename(name: str, extension: str) -> str: + if not name: + return f"file.{extension}" + + name = str(name).strip() + name = re.sub(r'[\\/\x00-\x1f\?\#\%\*\:\|\<\>"]', " ", name) + name = re.sub(r"\s+", " ", name).strip(" .") + + if not name: + return f"file.{extension}" + + base, _ = os.path.splitext(name) + base = base[:180].rstrip() or "file" + return f"{base}.{extension}" + + +class DocGeneratorParam(ComponentParamBase): """ - Define the PDF Generator component parameters. + Define the Docs Generator component parameters. """ def __init__(self): super().__init__() - # Output format - self.output_format = "pdf" # pdf, docx, txt - - # Content inputs + self.output_format = "pdf" # pdf, docx, txt, markdown, html self.content = "" - self.title = "" - self.subtitle = "" + self.filename = "" self.header_text = "" self.footer_text = "" - - # Images - self.logo_image = "" # base64 or file path - self.logo_position = "left" # left, center, right - self.logo_width = 2.0 # inches - self.logo_height = 1.0 # inches - - # Styling - self.font_family = "Helvetica" # Helvetica, Times-Roman, Courier - self.font_size = 12 - self.title_font_size = 24 - self.heading1_font_size = 18 - self.heading2_font_size = 16 - self.heading3_font_size = 14 - self.text_color = "#000000" - self.title_color = "#000000" - - # Page settings - self.page_size = "A4" - self.orientation = "portrait" # portrait, landscape - self.margin_top = 1.0 # inches - self.margin_bottom = 1.0 - self.margin_left = 1.0 - self.margin_right = 1.0 - self.line_spacing = 1.2 - - # Output settings - self.filename = "" - self.output_directory = "/tmp/pdf_outputs" + self.watermark_text = "" self.add_page_numbers = True self.add_timestamp = True - - # Advanced features - self.watermark_text = "" - self.enable_toc = False - + self.font_size = 12 self.outputs = { - "file_path": {"value": "", "type": "string"}, - "pdf_base64": {"value": "", "type": "string"}, "download": {"value": "", "type": "string"}, - "success": {"value": False, "type": "boolean"} } def check(self): - self.check_empty(self.content, "[PDFGenerator] Content") - self.check_valid_value(self.output_format, "[PDFGenerator] Output format", ["pdf", "docx", "txt"]) - self.check_valid_value(self.logo_position, "[PDFGenerator] Logo position", ["left", "center", "right"]) - self.check_valid_value(self.font_family, "[PDFGenerator] Font family", - ["Helvetica", "Times-Roman", "Courier", "Helvetica-Bold", "Times-Bold"]) - self.check_valid_value(self.page_size, "[PDFGenerator] Page size", ["A4", "Letter"]) - self.check_valid_value(self.orientation, "[PDFGenerator] Orientation", ["portrait", "landscape"]) - self.check_positive_number(self.font_size, "[PDFGenerator] Font size") - self.check_positive_number(self.margin_top, "[PDFGenerator] Margin top") + self.check_empty(self.content, "[DocGenerator] Content") + self.check_valid_value( + self.output_format, + "[DocGenerator] Output format", + ["pdf", "docx", "txt", "markdown", "html"], + ) + self.check_positive_number(self.font_size, "[DocGenerator] Font size") + if self.font_size < 12: + raise ValueError("[DocGenerator] Font size must be greater than or equal to 12") -class PDFGenerator(Message, ABC): - component_name = "PDFGenerator" - - # Track if Unicode fonts have been registered - _unicode_fonts_registered = False - _unicode_font_name = None - _unicode_font_bold_name = None - - @classmethod - def _reset_font_cache(cls): - """Reset font registration cache - useful for testing""" - cls._unicode_fonts_registered = False - cls._unicode_font_name = None - cls._unicode_font_bold_name = None - - @classmethod - def _register_unicode_fonts(cls): - """Register Unicode-compatible fonts for multi-language support. - - Uses CID fonts (STSong-Light) for reliable CJK rendering as TTF fonts - have issues with glyph mapping in some ReportLab versions. - """ - # If already registered successfully, return True - if cls._unicode_fonts_registered and cls._unicode_font_name is not None: - return True - - # Reset and try again if previous registration failed - cls._unicode_fonts_registered = True - cls._unicode_font_name = None - cls._unicode_font_bold_name = None - - # Use CID fonts for reliable CJK support - # These are built into ReportLab and work reliably across all platforms - cid_fonts = [ - 'STSong-Light', # Simplified Chinese - 'HeiseiMin-W3', # Japanese - 'HYSMyeongJo-Medium', # Korean - ] - - for cid_font in cid_fonts: - try: - pdfmetrics.registerFont(UnicodeCIDFont(cid_font)) - cls._unicode_font_name = cid_font - cls._unicode_font_bold_name = cid_font # CID fonts don't have bold variants - print(f"Registered CID font: {cid_font}") - break - except Exception as e: - print(f"Failed to register CID font {cid_font}: {e}") - continue - - # If CID fonts fail, try TTF fonts as fallback - if not cls._unicode_font_name: - font_paths = [ - '/usr/share/fonts/truetype/freefont/FreeSans.ttf', - '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', - ] - - for font_path in font_paths: - if os.path.exists(font_path): - try: - pdfmetrics.registerFont(TTFont('UnicodeFont', font_path)) - cls._unicode_font_name = 'UnicodeFont' - cls._unicode_font_bold_name = 'UnicodeFont' - print(f"Registered TTF font from: {font_path}") - - # Register font family - from reportlab.pdfbase.pdfmetrics import registerFontFamily - registerFontFamily('UnicodeFont', normal='UnicodeFont', bold='UnicodeFont') - break - except Exception as e: - print(f"Failed to register TTF font {font_path}: {e}") - continue - - return cls._unicode_font_name is not None - - @staticmethod - def _needs_unicode_font(text: str) -> bool: - """Check if text contains CJK or other complex scripts that need special fonts. - - Standard PDF fonts (Helvetica, Times, Courier) support: - - Basic Latin, Extended Latin, Cyrillic, Greek - - CID fonts are needed for: - - CJK (Chinese, Japanese, Korean) - - Arabic, Hebrew (RTL scripts) - - Thai, Hindi, and other Indic scripts - """ - if not text: - return False - - for char in text: - code = ord(char) - - # CJK Unified Ideographs and related ranges - if 0x4E00 <= code <= 0x9FFF: # CJK Unified Ideographs - return True - if 0x3400 <= code <= 0x4DBF: # CJK Extension A - return True - if 0x3000 <= code <= 0x303F: # CJK Symbols and Punctuation - return True - if 0x3040 <= code <= 0x309F: # Hiragana - return True - if 0x30A0 <= code <= 0x30FF: # Katakana - return True - if 0xAC00 <= code <= 0xD7AF: # Hangul Syllables - return True - if 0x1100 <= code <= 0x11FF: # Hangul Jamo - return True - - # Arabic and Hebrew (RTL scripts) - if 0x0600 <= code <= 0x06FF: # Arabic - return True - if 0x0590 <= code <= 0x05FF: # Hebrew - return True - - # Indic scripts - if 0x0900 <= code <= 0x097F: # Devanagari (Hindi) - return True - if 0x0E00 <= code <= 0x0E7F: # Thai - return True - - return False - - def _get_font_for_content(self, content: str) -> tuple: - """Get appropriate font based on content, returns (regular_font, bold_font)""" - if self._needs_unicode_font(content): - if self._register_unicode_fonts() and self._unicode_font_name: - return (self._unicode_font_name, self._unicode_font_bold_name or self._unicode_font_name) - else: - print("Warning: Content contains non-Latin characters but no Unicode font available") - - # Fall back to configured font - return (self._param.font_family, self._get_bold_font_name()) - - def _get_active_font(self) -> str: - """Get the currently active font (Unicode or configured)""" - return getattr(self, '_active_font', self._param.font_family) - - def _get_active_bold_font(self) -> str: - """Get the currently active bold font (Unicode or configured)""" - return getattr(self, '_active_bold_font', self._get_bold_font_name()) - - def _get_bold_font_name(self) -> str: - """Get the correct bold variant of the current font family""" - font_map = { - 'Helvetica': 'Helvetica-Bold', - 'Times-Roman': 'Times-Bold', - 'Courier': 'Courier-Bold', - } - font_family = getattr(self._param, 'font_family', 'Helvetica') - if 'Bold' in font_family: - return font_family - return font_map.get(font_family, 'Helvetica-Bold') +class DocGenerator(Message, ABC): + component_name = "DocGenerator" + _default_output_directory = os.path.join(tempfile.gettempdir(), "doc_outputs") + _overlay_margin = 36 + _overlay_font_size = 9 + _pdf_main_font = "Noto Sans CJK SC" + _pdf_cjk_font = "Noto Sans CJK SC" + _pdf_overlay_font = "STSong-Light" def get_input_form(self) -> dict[str, dict]: return { "content": { "name": "Content", - "type": "text" - }, - "title": { - "name": "Title", - "type": "line" - }, - "subtitle": { - "name": "Subtitle", - "type": "line" + "type": "text", } } - @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))) + @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60))) def _invoke(self, **kwargs): - import traceback - + file_path = None try: - # Get content from parameters (which may contain variable references) - content = self._param.content or "" - title = self._param.title or "" - subtitle = self._param.subtitle or "" - - # Log PDF generation start - print(f"Starting PDF generation for title: {title}, content length: {len(content)} chars") - - # Resolve variable references in content using canvas - if content and self._canvas.is_reff(content.strip()): - # Extract the variable reference and get its value - import re - matches = re.findall(self.variable_ref_patt, content, flags=re.DOTALL) - for match in matches: - try: - var_value = self._canvas.get_variable_value(match) - if var_value: - # Handle partial (streaming) content - if isinstance(var_value, partial): - resolved_content = "" - for chunk in var_value(): - resolved_content += chunk - content = content.replace("{" + match + "}", resolved_content) - else: - content = content.replace("{" + match + "}", str(var_value)) - except Exception as e: - print(f"Error resolving variable {match}: {str(e)}") - content = content.replace("{" + match + "}", f"[ERROR: {str(e)}]") - - # Also process with get_kwargs for any remaining variables - if content: - try: - content, _ = self.get_kwargs(content, kwargs) - except Exception as e: - print(f"Error processing content with get_kwargs: {str(e)}") - - # Process template variables in title - if title and self._canvas.is_reff(title): - try: - matches = re.findall(self.variable_ref_patt, title, flags=re.DOTALL) - for match in matches: - var_value = self._canvas.get_variable_value(match) - if var_value: - title = title.replace("{" + match + "}", str(var_value)) - except Exception as e: - print(f"Error processing title variables: {str(e)}") - - if title: - try: - title, _ = self.get_kwargs(title, kwargs) - except Exception: - pass - - # Process template variables in subtitle - if subtitle and self._canvas.is_reff(subtitle): - try: - matches = re.findall(self.variable_ref_patt, subtitle, flags=re.DOTALL) - for match in matches: - var_value = self._canvas.get_variable_value(match) - if var_value: - subtitle = subtitle.replace("{" + match + "}", str(var_value)) - except Exception as e: - print(f"Error processing subtitle variables: {str(e)}") - - if subtitle: - try: - subtitle, _ = self.get_kwargs(subtitle, kwargs) - except Exception: - pass - - # If content is still empty, check if it was passed directly - if not content: - content = kwargs.get("content", "") - - # Generate document based on format + content = self._resolve_content(kwargs) + output_format = self._param.output_format or "pdf" + try: - output_format = self._param.output_format or "pdf" - if output_format == "pdf": - file_path, doc_base64 = self._generate_pdf(content, title, subtitle) + file_path, file_bytes = self._generate_pdf(content) mime_type = "application/pdf" elif output_format == "docx": - file_path, doc_base64 = self._generate_docx(content, title, subtitle) + file_path, file_bytes = self._generate_docx(content) mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" elif output_format == "txt": - file_path, doc_base64 = self._generate_txt(content, title, subtitle) + file_path, file_bytes = self._generate_txt(content) mime_type = "text/plain" + elif output_format == "markdown": + file_path, file_bytes = self._generate_markdown(content) + mime_type = "text/markdown" + elif output_format == "html": + file_path, file_bytes = self._generate_html(content) + mime_type = "text/html" else: raise Exception(f"Unsupported output format: {output_format}") - + filename = os.path.basename(file_path) - - # Verify the file was created and has content - if not os.path.exists(file_path): - raise Exception(f"Document file was not created: {file_path}") - - file_size = os.path.getsize(file_path) - if file_size == 0: - raise Exception(f"Document file is empty: {file_path}") - - print(f"Successfully generated {output_format.upper()}: {file_path} (Size: {file_size} bytes)") - - # Set outputs - self.set_output("file_path", file_path) - self.set_output("pdf_base64", doc_base64) # Keep same output name for compatibility - self.set_output("success", True) - - # Create download info object + if not file_bytes: + raise Exception("Document file is empty") + + file_size = len(file_bytes) + doc_id = get_uuid() + settings.STORAGE_IMPL.put(self._canvas.get_tenant_id(), doc_id, file_bytes) + + logging.info( + "Successfully generated %s: %s (Size: %s bytes)", + output_format.upper(), + filename, + file_size, + ) + download_info = { + "doc_id": doc_id, "filename": filename, - "path": file_path, - "base64": doc_base64, "mime_type": mime_type, - "size": file_size + "size": file_size, } - # Output download info as JSON string so it can be used in Message block - download_json = json.dumps(download_info) - self.set_output("download", download_json) - + self.set_output("download", json.dumps(download_info)) return download_info - + except Exception as e: - error_msg = f"Error in _generate_pdf: {str(e)}\n{traceback.format_exc()}" - print(error_msg) - self.set_output("success", False) - self.set_output("_ERROR", f"PDF generation failed: {str(e)}") + logging.exception("Error generating %s document", output_format) + self.set_output("_ERROR", f"Document generation failed: {str(e)}") raise - + except Exception as e: - error_msg = f"Error in PDFGenerator._invoke: {str(e)}\n{traceback.format_exc()}" - print(error_msg) - self.set_output("success", False) - self.set_output("_ERROR", f"PDF generation failed: {str(e)}") + logging.exception("Error in DocGenerator._invoke") + self.set_output("_ERROR", f"Document generation failed: {str(e)}") raise - - def _generate_pdf(self, content: str, title: str = "", subtitle: str = "") -> tuple[str, str]: - """Generate PDF from markdown-style content with improved error handling and concurrency support""" - import uuid - import traceback - - # Create output directory if it doesn't exist - os.makedirs(self._param.output_directory, exist_ok=True) - - # Initialize variables that need cleanup - buffer = None - temp_file_path = None - file_path = None - - try: - # Generate a unique filename to prevent conflicts - if self._param.filename: - base_name = os.path.splitext(self._param.filename)[0] - filename = f"{base_name}_{uuid.uuid4().hex[:8]}.pdf" - else: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"document_{timestamp}_{uuid.uuid4().hex[:8]}.pdf" - - file_path = os.path.join(self._param.output_directory, filename) - temp_file_path = f"{file_path}.tmp" - - # Setup page size - page_size = A4 - if self._param.orientation == "landscape": - page_size = (A4[1], A4[0]) - - # Create PDF buffer and document - buffer = BytesIO() - doc = SimpleDocTemplate( - buffer, - pagesize=page_size, - topMargin=self._param.margin_top * inch, - bottomMargin=self._param.margin_bottom * inch, - leftMargin=self._param.margin_left * inch, - rightMargin=self._param.margin_right * inch - ) - - # Build story (content elements) - story = [] - # Combine all text content for Unicode font detection - all_text = f"{title} {subtitle} {content}" - - # IMPORTANT: Register Unicode fonts BEFORE creating any styles or Paragraphs - # This ensures the font family is available for ReportLab's HTML parser - if self._needs_unicode_font(all_text): - self._register_unicode_fonts() - - styles = self._create_styles(all_text) - - # Add logo if provided - if self._param.logo_image: - logo = self._add_logo() - if logo: - story.append(logo) - story.append(Spacer(1, 0.3 * inch)) - - # Add title - if title: - title_para = Paragraph(self._escape_html(title), styles['PDFTitle']) - story.append(title_para) - story.append(Spacer(1, 0.2 * inch)) - - # Add subtitle - if subtitle: - subtitle_para = Paragraph(self._escape_html(subtitle), styles['PDFSubtitle']) - story.append(subtitle_para) - story.append(Spacer(1, 0.3 * inch)) - - # Add timestamp if enabled - if self._param.add_timestamp: - timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - timestamp_para = Paragraph(timestamp_text, styles['Italic']) - story.append(timestamp_para) - story.append(Spacer(1, 0.2 * inch)) - - # Parse and add content - content_elements = self._parse_markdown_content(content, styles) - story.extend(content_elements) - - # Build PDF - doc.build(story, onFirstPage=self._add_page_decorations, onLaterPages=self._add_page_decorations) - - # Get PDF bytes - pdf_bytes = buffer.getvalue() - - # Write to temporary file first - with open(temp_file_path, 'wb') as f: - f.write(pdf_bytes) - - # Atomic rename to final filename (works across different filesystems) - if os.path.exists(file_path): - os.remove(file_path) - os.rename(temp_file_path, file_path) - - # Verify the file was created and has content - if not os.path.exists(file_path): - raise Exception(f"Failed to create output file: {file_path}") - - file_size = os.path.getsize(file_path) - if file_size == 0: - raise Exception(f"Generated PDF is empty: {file_path}") - - # Convert to base64 - pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8') - - return file_path, pdf_base64 - - except Exception as e: - # Clean up any temporary files on error - if temp_file_path and os.path.exists(temp_file_path): - try: - os.remove(temp_file_path) - except Exception as cleanup_error: - print(f"Error cleaning up temporary file: {cleanup_error}") - - error_msg = f"Error generating PDF: {str(e)}\n{traceback.format_exc()}" - print(error_msg) - raise Exception(f"PDF generation failed: {str(e)}") - finally: - # Ensure buffer is always closed - if buffer is not None: + if file_path and os.path.exists(file_path): + os.remove(file_path) + + def _resolve_content(self, kwargs: dict) -> str: + content = self._param.content or "" + logging.info("Starting document generation, content length: %s chars", len(content)) + + if content and self._canvas.is_reff(content.strip()): + matches = re.findall(self.variable_ref_patt, content, flags=re.DOTALL) + for match in matches: try: - buffer.close() - except Exception as close_error: - print(f"Error closing buffer: {close_error}") - - def _create_styles(self, content: str = ""): - """Create custom paragraph styles with Unicode font support if needed""" - # Check if content contains CJK characters that need special fonts - needs_cjk = self._needs_unicode_font(content) - - if needs_cjk: - # Use CID fonts for CJK content - if self._register_unicode_fonts() and self._unicode_font_name: - regular_font = self._unicode_font_name - bold_font = self._unicode_font_bold_name or self._unicode_font_name - print(f"Using CID font for CJK content: {regular_font}") - else: - # Fall back to configured font if CID fonts unavailable - regular_font = self._param.font_family - bold_font = self._get_bold_font_name() - print(f"Warning: CJK content detected but no CID font available, using {regular_font}") - else: - # Use user-selected font for Latin-only content - regular_font = self._param.font_family - bold_font = self._get_bold_font_name() - print(f"Using configured font: {regular_font}") - - # Store active fonts as instance variables for use in other methods - self._active_font = regular_font - self._active_bold_font = bold_font - - # Get fresh style sheet - styles = getSampleStyleSheet() - - # Helper function to get the correct bold font name - def get_bold_font(font_family): - """Get the correct bold variant of a font family""" - # If using Unicode font, return the Unicode bold - if font_family in ('UnicodeFont', self._unicode_font_name): - return bold_font - font_map = { - 'Helvetica': 'Helvetica-Bold', - 'Times-Roman': 'Times-Bold', - 'Courier': 'Courier-Bold', - } - if 'Bold' in font_family: - return font_family - return font_map.get(font_family, 'Helvetica-Bold') - - # Use detected font instead of configured font for non-Latin content - active_font = regular_font - active_bold_font = bold_font - - # Helper function to add or update style - def add_or_update_style(name, **kwargs): - if name in styles: - # Update existing style - style = styles[name] - for key, value in kwargs.items(): - setattr(style, key, value) - else: - # Add new style - styles.add(ParagraphStyle(name=name, **kwargs)) - - # IMPORTANT: Update base styles to use Unicode font for non-Latin content - # This ensures ALL text uses the correct font, not just our custom styles - add_or_update_style('Normal', fontName=active_font) - add_or_update_style('BodyText', fontName=active_font) - add_or_update_style('Bullet', fontName=active_font) - add_or_update_style('Heading1', fontName=active_bold_font) - add_or_update_style('Heading2', fontName=active_bold_font) - add_or_update_style('Heading3', fontName=active_bold_font) - add_or_update_style('Title', fontName=active_bold_font) - - # Title style - add_or_update_style( - 'PDFTitle', - parent=styles['Heading1'], - fontSize=self._param.title_font_size, - textColor=colors.HexColor(self._param.title_color), - fontName=active_bold_font, - alignment=TA_CENTER, - spaceAfter=12 - ) - - # Subtitle style - add_or_update_style( - 'PDFSubtitle', - parent=styles['Heading2'], - fontSize=self._param.heading2_font_size, - textColor=colors.HexColor(self._param.text_color), - fontName=active_font, - alignment=TA_CENTER, - spaceAfter=12 - ) - - # Custom heading styles - add_or_update_style( - 'CustomHeading1', - parent=styles['Heading1'], - fontSize=self._param.heading1_font_size, - fontName=active_bold_font, - textColor=colors.HexColor(self._param.text_color), - spaceAfter=12, - spaceBefore=12 - ) - - add_or_update_style( - 'CustomHeading2', - parent=styles['Heading2'], - fontSize=self._param.heading2_font_size, - fontName=active_bold_font, - textColor=colors.HexColor(self._param.text_color), - spaceAfter=10, - spaceBefore=10 - ) - - add_or_update_style( - 'CustomHeading3', - parent=styles['Heading3'], - fontSize=self._param.heading3_font_size, - fontName=active_bold_font, - textColor=colors.HexColor(self._param.text_color), - spaceAfter=8, - spaceBefore=8 - ) - - # Body text style - add_or_update_style( - 'CustomBody', - parent=styles['BodyText'], - fontSize=self._param.font_size, - fontName=active_font, - textColor=colors.HexColor(self._param.text_color), - leading=self._param.font_size * self._param.line_spacing, - alignment=TA_JUSTIFY - ) - - # Bullet style - add_or_update_style( - 'CustomBullet', - parent=styles['BodyText'], - fontSize=self._param.font_size, - fontName=active_font, - textColor=colors.HexColor(self._param.text_color), - leftIndent=20, - bulletIndent=10 - ) - - # Code style (keep Courier for code blocks) - add_or_update_style( - 'PDFCode', - parent=styles.get('Code', styles['Normal']), - fontSize=self._param.font_size - 1, - fontName='Courier', - textColor=colors.HexColor('#333333'), - backColor=colors.HexColor('#f5f5f5'), - leftIndent=20, - rightIndent=20 - ) - - # Italic style - add_or_update_style( - 'Italic', - parent=styles['Normal'], - fontSize=self._param.font_size, - fontName=active_font, - textColor=colors.HexColor(self._param.text_color) - ) - - return styles - - def _parse_markdown_content(self, content: str, styles): - """Parse markdown-style content and convert to PDF elements""" - elements = [] - lines = content.split('\n') - - i = 0 - while i < len(lines): - line = lines[i].strip() - - # Skip empty lines - if not line: - elements.append(Spacer(1, 0.1 * inch)) - i += 1 - continue - - # Horizontal rule - if line == '---' or line == '___': - elements.append(Spacer(1, 0.1 * inch)) - elements.append(self._create_horizontal_line()) - elements.append(Spacer(1, 0.1 * inch)) - i += 1 - continue - - # Heading 1 - if line.startswith('# ') and not line.startswith('## '): - text = line[2:].strip() - elements.append(Paragraph(self._format_inline(text), styles['CustomHeading1'])) - i += 1 - continue - - # Heading 2 - if line.startswith('## ') and not line.startswith('### '): - text = line[3:].strip() - elements.append(Paragraph(self._format_inline(text), styles['CustomHeading2'])) - i += 1 - continue - - # Heading 3 - if line.startswith('### '): - text = line[4:].strip() - elements.append(Paragraph(self._format_inline(text), styles['CustomHeading3'])) - i += 1 - continue - - # Bullet list - if line.startswith('- ') or line.startswith('* '): - bullet_items = [] - while i < len(lines) and (lines[i].strip().startswith('- ') or lines[i].strip().startswith('* ')): - item_text = lines[i].strip()[2:].strip() - formatted = self._format_inline(item_text) - bullet_items.append(f"• {formatted}") - i += 1 - for item in bullet_items: - elements.append(Paragraph(item, styles['CustomBullet'])) - continue - - # Numbered list - if re.match(r'^\d+\.\s', line): - numbered_items = [] - counter = 1 - while i < len(lines) and re.match(r'^\d+\.\s', lines[i].strip()): - item_text = re.sub(r'^\d+\.\s', '', lines[i].strip()) - numbered_items.append(f"{counter}. {self._format_inline(item_text)}") - counter += 1 - i += 1 - for item in numbered_items: - elements.append(Paragraph(item, styles['CustomBullet'])) - continue - - # Table detection (markdown table must start with |) - if line.startswith('|') and '|' in line: - table_lines = [] - # Collect all consecutive lines that look like table rows - while i < len(lines) and lines[i].strip() and '|' in lines[i]: - table_lines.append(lines[i].strip()) - i += 1 - - # Only process if we have at least 2 lines (header + separator or header + data) - if len(table_lines) >= 2: - table_elements = self._create_table(table_lines) - if table_elements: - # _create_table now returns a list of elements - elements.extend(table_elements) - elements.append(Spacer(1, 0.2 * inch)) - continue - else: - # Not a valid table, treat as regular text - i -= len(table_lines) # Reset position - - # Code block - if line.startswith('```'): - code_lines = [] - i += 1 - while i < len(lines) and not lines[i].strip().startswith('```'): - code_lines.append(lines[i]) - i += 1 - if i < len(lines): - i += 1 - code_text = '\n'.join(code_lines) - elements.append(Paragraph(self._escape_html(code_text), styles['PDFCode'])) - elements.append(Spacer(1, 0.1 * inch)) - continue - - # Regular paragraph - paragraph_lines = [line] - i += 1 - while i < len(lines) and lines[i].strip() and not self._is_special_line(lines[i]): - paragraph_lines.append(lines[i].strip()) - i += 1 - - paragraph_text = ' '.join(paragraph_lines) - formatted_text = self._format_inline(paragraph_text) - elements.append(Paragraph(formatted_text, styles['CustomBody'])) - elements.append(Spacer(1, 0.1 * inch)) - - return elements - - def _is_special_line(self, line: str) -> bool: - """Check if line is a special markdown element""" - line = line.strip() - return (line.startswith('#') or - line.startswith('- ') or - line.startswith('* ') or - re.match(r'^\d+\.\s', line) or - line in ['---', '___'] or - line.startswith('```') or - '|' in line) - - def _format_inline(self, text: str) -> str: - """Format inline markdown (bold, italic, code)""" - # First, escape the existing HTML to not conflict with our tags. - text = self._escape_html(text) - - # IMPORTANT: Process inline code FIRST to protect underscores inside code blocks - # Use a placeholder to protect code blocks from italic/bold processing - code_blocks = [] - def save_code(match): - code_blocks.append(match.group(1)) - return f"__CODE_BLOCK_{len(code_blocks)-1}__" - - text = re.sub(r'`(.+?)`', save_code, text) - - # Then, apply markdown formatting. - # The order is important: from most specific to least specific. - - # Bold and italic combined: ***text*** or ___text___ - text = re.sub(r'\*\*\*(.+?)\*\*\*', r'\1', text) - text = re.sub(r'___(.+?)___', r'\1', text) - - # Bold: **text** or __text__ - text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) - text = re.sub(r'__([^_]+?)__', r'\1', text) # More restrictive to avoid matching placeholders - - # Italic: *text* or _text_ (but not underscores in words like variable_name) - text = re.sub(r'\*([^*]+?)\*', r'\1', text) - # Only match _text_ when surrounded by spaces or at start/end, not mid-word underscores - text = re.sub(r'(?\1', text) - - # Restore code blocks with proper formatting - for i, code in enumerate(code_blocks): - text = text.replace(f"__CODE_BLOCK_{i}__", f'{code}') - - return text - - def _escape_html(self, text: str) -> str: - """Escape HTML special characters and clean up markdown. - - Args: - text: Input text that may contain HTML or markdown - - Returns: - str: Cleaned and escaped text - """ - if not text: - return "" - - # Ensure we're working with a string - text = str(text) - - # Remove HTML form elements and tags - text = re.sub(r']*>', '', text, flags=re.IGNORECASE) # Remove input tags - text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove textarea - text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove select - text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove buttons - text = re.sub(r']*>.*?', '', text, flags=re.IGNORECASE | re.DOTALL) # Remove forms - - # Remove other common HTML tags (but preserve content) - text = re.sub(r']*>', '', text, flags=re.IGNORECASE) - text = re.sub(r'', '', text, flags=re.IGNORECASE) - text = re.sub(r']*>', '', text, flags=re.IGNORECASE) - text = re.sub(r'', '', text, flags=re.IGNORECASE) - text = re.sub(r']*>', '', text, flags=re.IGNORECASE) - text = re.sub(r'

', '\n', text, flags=re.IGNORECASE) - - # First, handle common markdown table artifacts - text = re.sub(r'^[|\-\s:]+$', '', text, flags=re.MULTILINE) # Remove separator lines - text = re.sub(r'^\s*\|\s*|\s*\|\s*$', '', text) # Remove leading/trailing pipes - text = re.sub(r'\s*\|\s*', ' | ', text) # Normalize pipes - - # Remove markdown links, but keep other formatting characters for _format_inline - text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # Remove markdown links - - # Escape HTML special characters - text = text.replace('&', '&') - text = text.replace('<', '<') - text = text.replace('>', '>') - - # Clean up excessive whitespace - text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text) # Multiple blank lines to double - text = re.sub(r' +', ' ', text) # Multiple spaces to single - - return text.strip() - - def _get_cell_style(self, row_idx: int, is_header: bool = False, font_size: int = None) -> 'ParagraphStyle': - """Get the appropriate style for a table cell.""" - styles = getSampleStyleSheet() - - # Helper function to get the correct bold font name - def get_bold_font(font_family): - font_map = { - 'Helvetica': 'Helvetica-Bold', - 'Times-Roman': 'Times-Bold', - 'Courier': 'Courier-Bold', - } - if 'Bold' in font_family: - return font_family - return font_map.get(font_family, 'Helvetica-Bold') - - if is_header: - return ParagraphStyle( - 'TableHeader', - parent=styles['Normal'], - fontSize=self._param.font_size, - fontName=self._get_active_bold_font(), - textColor=colors.whitesmoke, - alignment=TA_CENTER, - leading=self._param.font_size * 1.2, - wordWrap='CJK' - ) - else: - font_size = font_size or (self._param.font_size - 1) - return ParagraphStyle( - 'TableCell', - parent=styles['Normal'], - fontSize=font_size, - fontName=self._get_active_font(), - textColor=colors.black, - alignment=TA_LEFT, - leading=font_size * 1.15, - wordWrap='CJK' - ) - - def _convert_table_to_definition_list(self, data: list[list[str]]) -> list: - """Convert a table to a definition list format for better handling of large content. - - This method handles both simple and complex tables, including those with nested content. - It ensures that large cell content is properly wrapped and paginated. - """ - elements = [] - styles = getSampleStyleSheet() - - # Base styles - base_font_size = getattr(self._param, 'font_size', 10) - - # Body style - body_style = ParagraphStyle( - 'TableBody', - parent=styles['Normal'], - fontSize=base_font_size, - fontName=self._get_active_font(), - textColor=colors.HexColor(getattr(self._param, 'text_color', '#000000')), - spaceAfter=6, - leading=base_font_size * 1.2 - ) - - # Label style (for field names) - label_style = ParagraphStyle( - 'LabelStyle', - parent=body_style, - fontName=self._get_active_bold_font(), - textColor=colors.HexColor('#2c3e50'), - fontSize=base_font_size, - spaceAfter=4, - leftIndent=0, - leading=base_font_size * 1.3 - ) - - # Value style (for cell content) - clean, no borders - value_style = ParagraphStyle( - 'ValueStyle', - parent=body_style, - leftIndent=15, - rightIndent=0, - spaceAfter=8, - spaceBefore=2, - fontSize=base_font_size, - textColor=colors.HexColor('#333333'), - alignment=TA_JUSTIFY, - leading=base_font_size * 1.4, - # No borders or background - clean text only - ) - - try: - # If we have no data, return empty list - if not data or not any(data): - return elements - - # Get column headers or generate them - headers = [] - if data and len(data) > 0: - headers = [str(h).strip() for h in data[0]] - - # If no headers or empty headers, generate them - if not any(headers): - headers = [f"Column {i+1}" for i in range(len(data[0]) if data and len(data) > 0 else 0)] - - # Process each data row (skip header if it exists) - start_row = 1 if len(data) > 1 and any(data[0]) else 0 - - for row_idx in range(start_row, len(data)): - row = data[row_idx] if row_idx < len(data) else [] - if not row: - continue - - # Create a container for the row - row_elements = [] - - # Process each cell in the row - for col_idx in range(len(headers)): - if col_idx >= len(headers): + var_value = self._canvas.get_variable_value(match) + if var_value is None: continue - - # Get cell content - cell_text = str(row[col_idx]).strip() if col_idx < len(row) and row[col_idx] is not None else "" - - # Skip empty cells - if not cell_text or cell_text.isspace(): - continue - - # Clean up markdown artifacts for regular text content - cell_text = str(cell_text) # Ensure it's a string - - # Remove markdown table formatting - cell_text = re.sub(r'^[|\-\s:]+$', '', cell_text, flags=re.MULTILINE) # Remove separator lines - cell_text = re.sub(r'^\s*\|\s*|\s*\|\s*$', '', cell_text) # Remove leading/trailing pipes - cell_text = re.sub(r'\s*\|\s*', ' | ', cell_text) # Normalize pipes - cell_text = re.sub(r'\s+', ' ', cell_text).strip() # Normalize whitespace - - # Remove any remaining markdown formatting - cell_text = re.sub(r'`(.*?)`', r'\1', cell_text) # Remove code ticks - cell_text = re.sub(r'\*\*(.*?)\*\*', r'\1', cell_text) # Remove bold - cell_text = re.sub(r'\*(.*?)\*', r'\1', cell_text) # Remove italic - - # Clean up any HTML entities or special characters - cell_text = self._escape_html(cell_text) - - # If content still looks like a table, convert it to plain text - if '|' in cell_text and ('--' in cell_text or any(cell_text.count('|') > 2 for line in cell_text.split('\n') if line.strip())): - # Convert to a simple text format - lines = [line.strip() for line in cell_text.split('\n') if line.strip()] - cell_text = ' | '.join(lines[:5]) # Join first 5 lines with pipe - if len(lines) > 5: - cell_text += '...' - - # Process long content with better wrapping - max_chars_per_line = 100 # Reduced for better readability - max_paragraphs = 3 # Maximum number of paragraphs to show initially - - # Split into paragraphs - paragraphs = [p for p in cell_text.split('\n\n') if p.strip()] - - # If content is too long, truncate with "show more" indicator - if len(paragraphs) > max_paragraphs or any(len(p) > max_chars_per_line * 3 for p in paragraphs): - wrapped_paragraphs = [] - - for i, para in enumerate(paragraphs[:max_paragraphs]): - if len(para) > max_chars_per_line * 3: - # Split long paragraphs - words = para.split() - current_line = [] - current_length = 0 - - for word in words: - if current_line and current_length + len(word) + 1 > max_chars_per_line: - wrapped_paragraphs.append(' '.join(current_line)) - current_line = [word] - current_length = len(word) - else: - current_line.append(word) - current_length += len(word) + (1 if current_line else 0) - - if current_line: - wrapped_paragraphs.append(' '.join(current_line)) - else: - wrapped_paragraphs.append(para) - - # Add "show more" indicator if there are more paragraphs - if len(paragraphs) > max_paragraphs: - wrapped_paragraphs.append(f"... and {len(paragraphs) - max_paragraphs} more paragraphs") - - cell_text = '\n\n'.join(wrapped_paragraphs) - - # Add label and content with clean formatting (no borders) - label_para = Paragraph(f"{self._escape_html(headers[col_idx])}:", label_style) - value_para = Paragraph(self._escape_html(cell_text), value_style) - - # Add elements with proper spacing - row_elements.append(label_para) - row_elements.append(Spacer(1, 0.03 * 72)) # Tiny space between label and value - row_elements.append(value_para) - - # Add spacing between rows - if row_elements and row_idx < len(data) - 1: - # Add a subtle horizontal line as separator - row_elements.append(Spacer(1, 0.1 * 72)) - row_elements.append(self._create_horizontal_line(width=0.5, color='#e0e0e0')) - row_elements.append(Spacer(1, 0.15 * 72)) - - elements.extend(row_elements) - - # Add some space after the table - if elements: - elements.append(Spacer(1, 0.3 * 72)) # 0.3 inches in points - - except Exception as e: - # Fallback to simple text representation if something goes wrong - error_style = ParagraphStyle( - 'ErrorStyle', - parent=styles['Normal'], - fontSize=base_font_size - 1, - textColor=colors.red, - backColor=colors.HexColor('#fff0f0'), - borderWidth=1, - borderColor=colors.red, - borderPadding=5 - ) - - error_msg = [ - Paragraph("Error processing table:", error_style), - Paragraph(str(e), error_style), - Spacer(1, 0.2 * 72) - ] - - # Add a simplified version of the table - try: - for row in data[:10]: # Limit to first 10 rows to avoid huge error output - error_msg.append(Paragraph(" | ".join(str(cell) for cell in row), body_style)) - if len(data) > 10: - error_msg.append(Paragraph(f"... and {len(data) - 10} more rows", body_style)) - except Exception: - pass - - elements.extend(error_msg) - - return elements - - def _create_table(self, table_lines: list[str]) -> Optional[list]: - """Create a table from markdown table syntax with robust error handling. - - This method handles simple tables and falls back to a list format for complex cases. - - Returns: - A list of flowables (could be a table or alternative representation) - Returns None if the table cannot be created. - """ - if not table_lines or len(table_lines) < 2: - return None - - try: - # Parse table data - data = [] - max_columns = 0 - - for line in table_lines: - # Skip separator lines (e.g., |---|---|) - if re.match(r'^\|[\s\-:]+\|$', line): - continue - - # Handle empty lines within tables - if not line.strip(): - continue - - # Split by | and clean up cells - cells = [] - in_quotes = False - current_cell = "" - - # Custom split to handle escaped pipes and quoted content - for char in line[1:]: # Skip initial | - if char == '|' and not in_quotes: - cells.append(current_cell.strip()) - current_cell = "" - elif char == '"': - in_quotes = not in_quotes - current_cell += char - elif char == '\\' and not in_quotes: - # Handle escaped characters - pass + if isinstance(var_value, partial): + resolved_content = "" + for chunk in var_value(): + resolved_content += chunk + content = content.replace("{" + match + "}", resolved_content) else: - current_cell += char - - # Add the last cell - if current_cell.strip() or len(cells) > 0: - cells.append(current_cell.strip()) - - # Remove empty first/last elements if they're empty (from leading/trailing |) - if cells and not cells[0]: - cells = cells[1:] - if cells and not cells[-1]: - cells = cells[:-1] - - if cells: - data.append(cells) - max_columns = max(max_columns, len(cells)) - - if not data or max_columns == 0: - return None - - # Ensure all rows have the same number of columns - for row in data: - while len(row) < max_columns: - row.append('') - - # Calculate available width for table - from reportlab.lib.pagesizes import A4 - page_width = A4[0] if self._param.orientation == 'portrait' else A4[1] - available_width = page_width - (self._param.margin_left + self._param.margin_right) * inch - - # Check if we should use definition list format - max_cell_length = max((len(str(cell)) for row in data for cell in row), default=0) - total_rows = len(data) - - # Use definition list format if: - # - Any cell is too large (> 300 chars), OR - # - More than 6 columns, OR - # - More than 20 rows, OR - # - Contains nested tables or complex structures - has_nested_tables = any('|' in cell and '---' in cell for row in data for cell in row) - has_complex_cells = any(len(str(cell)) > 150 for row in data for cell in row) - - should_use_list_format = ( - max_cell_length > 300 or - max_columns > 6 or - total_rows > 20 or - has_nested_tables or - has_complex_cells - ) - - if should_use_list_format: - return self._convert_table_to_definition_list(data) - - # Process cells for normal table - processed_data = [] - for row_idx, row in enumerate(data): - processed_row = [] - for cell_idx, cell in enumerate(row): - cell_text = str(cell).strip() if cell is not None else "" - - # Handle empty cells - if not cell_text: - processed_row.append("") - continue - - # Clean up markdown table artifacts - cell_text = re.sub(r'\\\|', '|', cell_text) # Unescape pipes - cell_text = re.sub(r'\\n', '\n', cell_text) # Handle explicit newlines - - # Check for nested tables - if '|' in cell_text and '---' in cell_text: - # This cell contains a nested table - nested_lines = [line.strip() for line in cell_text.split('\n') if line.strip()] - nested_table = self._create_table(nested_lines) - if nested_table: - processed_row.append(nested_table[0]) # Add the nested table - continue - - # Process as regular text - font_size = self._param.font_size - 1 if row_idx > 0 else self._param.font_size - try: - style = self._get_cell_style(row_idx, is_header=(row_idx == 0), font_size=font_size) - escaped_text = self._escape_html(cell_text) - processed_row.append(Paragraph(escaped_text, style)) - except Exception: - processed_row.append(self._escape_html(cell_text)) - - processed_data.append(processed_row) - - # Calculate column widths - min_col_width = 0.5 * inch - max_cols = int(available_width / min_col_width) - - if max_columns > max_cols: - return self._convert_table_to_definition_list(data) - - col_width = max(min_col_width, available_width / max_columns) - col_widths = [col_width] * max_columns - - # Create the table - try: - table = LongTable(processed_data, colWidths=col_widths, repeatRows=1) - - # Define table style - table_style = [ - ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2c3e50')), # Darker header - ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), - ('ALIGN', (0, 0), (-1, 0), 'CENTER'), - ('FONTNAME', (0, 0), (-1, 0), self._get_active_bold_font()), - ('FONTSIZE', (0, 0), (-1, -1), self._param.font_size - 1), - ('BOTTOMPADDING', (0, 0), (-1, 0), 12), - ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#f8f9fa')), # Lighter background - ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#dee2e6')), # Lighter grid - ('VALIGN', (0, 0), (-1, -1), 'TOP'), - ('TOPPADDING', (0, 0), (-1, -1), 8), - ('BOTTOMPADDING', (0, 0), (-1, -1), 8), - ('LEFTPADDING', (0, 0), (-1, -1), 8), - ('RIGHTPADDING', (0, 0), (-1, -1), 8), - ] - - # Add zebra striping for better readability - for i in range(1, len(processed_data)): - if i % 2 == 0: - table_style.append(('BACKGROUND', (0, i), (-1, i), colors.HexColor('#f1f3f5'))) - - table.setStyle(TableStyle(table_style)) - - # Add a small spacer after the table - return [table, Spacer(1, 0.2 * inch)] - - except Exception as table_error: - print(f"Error creating table: {table_error}") - return self._convert_table_to_definition_list(data) - - except Exception as e: - print(f"Error processing table: {e}") - # Return a simple text representation of the table - try: - text_content = [] - for row in data: - text_content.append(" | ".join(str(cell) for cell in row)) - return [Paragraph("
".join(text_content), self._get_cell_style(0))] - except Exception: - return None + content = content.replace("{" + match + "}", str(var_value)) + except Exception as e: + logging.warning("Error resolving variable %s: %s", match, str(e)) + content = content.replace("{" + match + "}", f"[ERROR: {str(e)}]") - def _create_horizontal_line(self, width: float = 1, color: str = None): - """Create a horizontal line with customizable width and color - - Args: - width: Line thickness in points (default: 1) - color: Hex color string (default: grey) - - Returns: - HRFlowable: Horizontal line element - """ - from reportlab.platypus import HRFlowable - line_color = colors.HexColor(color) if color else colors.grey - return HRFlowable(width="100%", thickness=width, color=line_color, spaceBefore=0, spaceAfter=0) + if content: + try: + content, _ = self.get_kwargs(content, kwargs) + except Exception as e: + logging.warning("Error processing content with get_kwargs: %s", str(e)) + + if not content: + content = kwargs.get("content", "") + + return content + + def _get_output_directory(self) -> str: + os.makedirs(self._default_output_directory, exist_ok=True) + return self._default_output_directory + + def _build_output_filename(self, output_format: str) -> str: + import uuid + + if self._param.filename: + return sanitize_filename(self._param.filename, output_format.lower()) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + return f"document_{timestamp}_{uuid.uuid4().hex[:8]}.{output_format}" + + def _get_timestamp_text(self) -> str: + return f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + + def _write_bytes_output(self, content: bytes, extension: str) -> tuple[str, bytes]: + output_directory = self._get_output_directory() + filename = self._build_output_filename(extension) + file_path = os.path.join(output_directory, filename) + with open(file_path, "wb") as f: + f.write(content) + return file_path, content + + def _build_markdown_source(self, content: str, include_timestamp_in_body: bool = False) -> str: + if not (include_timestamp_in_body and self._param.add_timestamp): + return content + return f"{self._get_timestamp_text()}\n\n{content}" + + def _get_heading_sizes(self) -> tuple[int, int, int]: + base = int(self._param.font_size) + return base + 6, base + 4, base + 2 + + def _generate_pandoc_binary_output( + self, + content: str, + target_format: str, + extension: str, + include_timestamp_in_body: bool = False, + extra_args: list[str] | None = None, + ) -> tuple[str, bytes]: + import pypandoc + + output_directory = self._get_output_directory() + filename = self._build_output_filename(extension) + file_path = os.path.join(output_directory, filename) + markdown_content = self._build_markdown_source( + content, + include_timestamp_in_body=include_timestamp_in_body, + ) + + pypandoc.convert_text( + markdown_content, + to=target_format, + format="markdown", + outputfile=file_path, + extra_args=extra_args or [], + ) + + with open(file_path, "rb") as f: + file_bytes = f.read() + + return file_path, file_bytes + + def _generate_pandoc_text_output( + self, + content: str, + target_format: str, + extension: str, + include_timestamp_in_body: bool = True, + ) -> tuple[str, bytes]: + import pypandoc + + markdown_content = self._build_markdown_source( + content, + include_timestamp_in_body=include_timestamp_in_body, + ) + converted_content = pypandoc.convert_text( + markdown_content, + to=target_format, + format="markdown", + ) + return self._write_bytes_output(converted_content.encode("utf-8"), extension) + + def _select_pdf_engine(self) -> str: + if shutil.which("xelatex"): + return "xelatex" + raise Exception("No PDF engine found. Install xelatex.") + + def _get_pdf_font_args(self) -> list[str]: + return [ + "-V", + f"mainfont={self._pdf_main_font}", + "-V", + f"CJKmainfont={self._pdf_cjk_font}", + ] + + def _get_pdf_overlay_font_name(self) -> str: + from reportlab.pdfbase import pdfmetrics + from reportlab.pdfbase.cidfonts import UnicodeCIDFont - def _add_logo(self) -> Optional[Image]: - """Add logo image to PDF""" try: - # Check if it's base64 or file path - if self._param.logo_image.startswith('data:image'): - # Extract base64 data - base64_data = self._param.logo_image.split(',')[1] - image_data = base64.b64decode(base64_data) - img = Image(BytesIO(image_data)) - elif os.path.exists(self._param.logo_image): - img = Image(self._param.logo_image) - else: - return None - - # Set size - img.drawWidth = self._param.logo_width * inch - img.drawHeight = self._param.logo_height * inch - - # Set alignment - if self._param.logo_position == 'center': - img.hAlign = 'CENTER' - elif self._param.logo_position == 'right': - img.hAlign = 'RIGHT' - else: - img.hAlign = 'LEFT' - - return img - except Exception as e: - print(f"Error adding logo: {e}") + pdfmetrics.getFont(self._pdf_overlay_font) + except KeyError: + pdfmetrics.registerFont(UnicodeCIDFont(self._pdf_overlay_font)) + + return self._pdf_overlay_font + + def _build_pdf_heading_overrides(self) -> str: + font_size = int(self._param.font_size) + leading = round(font_size * 1.2, 1) + h1_size, h2_size, h3_size = self._get_heading_sizes() + h1_leading = round(h1_size * 1.2, 1) + h2_leading = round(h2_size * 1.2, 1) + h3_leading = round(h3_size * 1.2, 1) + + return rf""" +\makeatletter +\renewcommand\normalsize{{ + \@setfontsize\normalsize{{{font_size}pt}}{{{leading}pt}} + \abovedisplayskip 12pt plus 3pt minus 7pt + \abovedisplayshortskip \z@ plus 3pt + \belowdisplayshortskip 6.5pt plus 3.5pt minus 3pt + \belowdisplayskip \abovedisplayskip + \let\@listi\@listI +}} +\normalsize +\renewcommand\section{{\@startsection{{section}}{{1}}{{\z@}}{{-3.5ex \@plus -1ex \@minus -.2ex}}{{2.3ex \@plus .2ex}}{{\normalfont\fontsize{{{h1_size}pt}}{{{h1_leading}pt}}\selectfont\bfseries}}}} +\renewcommand\subsection{{\@startsection{{subsection}}{{2}}{{\z@}}{{-3.25ex\@plus -1ex \@minus -.2ex}}{{1.5ex \@plus .2ex}}{{\normalfont\fontsize{{{h2_size}pt}}{{{h2_leading}pt}}\selectfont\bfseries}}}} +\renewcommand\subsubsection{{\@startsection{{subsubsection}}{{3}}{{\z@}}{{-3.25ex\@plus -1ex \@minus -.2ex}}{{1.5ex \@plus .2ex}}{{\normalfont\fontsize{{{h3_size}pt}}{{{h3_leading}pt}}\selectfont\bfseries}}}} +\makeatother +""".strip() + + def _write_temp_tex(self, content: str) -> str: + output_directory = self._get_output_directory() + with tempfile.NamedTemporaryFile( + mode="w", + encoding="utf-8", + suffix=".tex", + dir=output_directory, + delete=False, + ) as f: + f.write(content) + return f.name + + def _should_apply_pdf_overlay(self) -> bool: + return any( + [ + self._param.header_text, + self._param.footer_text, + self._param.watermark_text, + self._param.add_page_numbers, + self._param.add_timestamp, + ] + ) + + def _build_pdf_overlay_page(self, width: float, height: float, page_number: int): + if not self._should_apply_pdf_overlay(): return None - def _add_page_decorations(self, canvas, doc): - """Add header, footer, page numbers, watermark""" - canvas.saveState() - - # Get active font for decorations - active_font = self._get_active_font() - - # Add watermark + from pypdf import PdfReader + from reportlab.lib.colors import Color + from reportlab.pdfgen import canvas as pdf_canvas + + buffer = BytesIO() + overlay = pdf_canvas.Canvas(buffer, pagesize=(width, height)) + overlay_font = self._get_pdf_overlay_font_name() + if self._param.watermark_text: - canvas.setFont(active_font, 60) - canvas.setFillColorRGB(0.9, 0.9, 0.9, alpha=0.3) - canvas.saveState() - canvas.translate(doc.pagesize[0] / 2, doc.pagesize[1] / 2) - canvas.rotate(45) - canvas.drawCentredString(0, 0, self._param.watermark_text) - canvas.restoreState() - - # Add header + overlay.saveState() + if hasattr(overlay, "setFillAlpha"): + overlay.setFillAlpha(0.15) + overlay.setFillColor(Color(0.6, 0.6, 0.6)) + overlay.setFont(overlay_font, 48) + overlay.translate(width / 2, height / 2) + overlay.rotate(45) + overlay.drawCentredString(0, 0, self._param.watermark_text) + overlay.restoreState() + + overlay.setFont(overlay_font, self._overlay_font_size) + overlay.setFillColor(Color(0.35, 0.35, 0.35)) + if self._param.header_text: - canvas.setFont(active_font, 9) - canvas.setFillColorRGB(0.5, 0.5, 0.5) - canvas.drawString(doc.leftMargin, doc.pagesize[1] - 0.5 * inch, self._param.header_text) - - # Add footer + overlay.drawString( + self._overlay_margin, + height - self._overlay_margin + 8, + self._param.header_text, + ) + if self._param.footer_text: - canvas.setFont(active_font, 9) - canvas.setFillColorRGB(0.5, 0.5, 0.5) - canvas.drawString(doc.leftMargin, 0.5 * inch, self._param.footer_text) - - # Add page numbers + overlay.drawString( + self._overlay_margin, + self._overlay_margin - 8, + self._param.footer_text, + ) + + if self._param.add_timestamp: + overlay.drawCentredString( + width / 2, + self._overlay_margin - 8, + self._get_timestamp_text(), + ) + if self._param.add_page_numbers: - page_num = canvas.getPageNumber() - text = f"Page {page_num}" - canvas.setFont(active_font, 9) - canvas.setFillColorRGB(0.5, 0.5, 0.5) - canvas.drawRightString(doc.pagesize[0] - doc.rightMargin, 0.5 * inch, text) - - canvas.restoreState() + overlay.drawRightString( + width - self._overlay_margin, + self._overlay_margin - 8, + f"Page {page_number}", + ) + + overlay.save() + buffer.seek(0) + return PdfReader(buffer).pages[0] + + def _apply_pdf_overlay(self, file_path: str) -> tuple[str, bytes]: + from pypdf import PdfReader, PdfWriter + + if not self._should_apply_pdf_overlay(): + with open(file_path, "rb") as f: + file_bytes = f.read() + return file_path, file_bytes + + reader = PdfReader(file_path) + writer = PdfWriter() + + for page_number, page in enumerate(reader.pages, start=1): + overlay_page = self._build_pdf_overlay_page( + float(page.mediabox.width), + float(page.mediabox.height), + page_number, + ) + if overlay_page is not None: + page.merge_page(overlay_page) + writer.add_page(page) + + temp_file = f"{file_path}.overlay" + with open(temp_file, "wb") as f: + writer.write(f) + + os.replace(temp_file, file_path) + with open(file_path, "rb") as f: + file_bytes = f.read() + return file_path, file_bytes + + def _clear_docx_container(self, container): + element = container._element + for child in list(element): + element.remove(child) + + def _append_docx_field(self, run, instruction: str): + from docx.oxml import OxmlElement + + begin = OxmlElement("w:fldChar") + begin.set(run.part.element.nsmap["w"] and "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}fldCharType", "begin") + + instr = OxmlElement("w:instrText") + instr.set("{http://www.w3.org/XML/1998/namespace}space", "preserve") + instr.text = instruction + + end = OxmlElement("w:fldChar") + end.set(run.part.element.nsmap["w"] and "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}fldCharType", "end") + + run._r.append(begin) + run._r.append(instr) + run._r.append(end) + + def _add_docx_watermark(self, section): + if not self._param.watermark_text: + return + + from docx.enum.text import WD_ALIGN_PARAGRAPH + from docx.oxml import parse_xml + + header = section.header + paragraph = header.add_paragraph() + paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = paragraph.add_run() + watermark_xml = parse_xml( + rf""" + + + + + + + """ + ) + run._r.append(watermark_xml) + + def _normalize_docx_section_geometry(self, section, default_section): + for attr in ("page_width", "left_margin", "right_margin"): + if getattr(section, attr) is None: + setattr(section, attr, getattr(default_section, attr)) + + def _get_docx_available_width(self, section): + page_width = section.page_width + left_margin = section.left_margin + right_margin = section.right_margin + + if page_width is None or left_margin is None or right_margin is None: + raise ValueError("DOCX section geometry is incomplete after normalization.") + + return page_width - left_margin - right_margin + + def _decorate_docx(self, file_path: str) -> tuple[str, bytes]: + from docx import Document + from docx.enum.text import WD_TAB_ALIGNMENT + from docx.shared import Pt + + document = Document(file_path) + default_section = Document().sections[0] + h1_size, h2_size, h3_size = self._get_heading_sizes() + + style_map = { + "Normal": int(self._param.font_size), + "Heading 1": h1_size, + "Heading 2": h2_size, + "Heading 3": h3_size, + } + for style_name, size in style_map.items(): + try: + document.styles[style_name].font.size = Pt(size) + except Exception: + continue + + for section in document.sections: + self._normalize_docx_section_geometry(section, default_section) + available_width = self._get_docx_available_width(section) + + header = section.header + header.is_linked_to_previous = False + self._clear_docx_container(header) + if self._param.header_text: + paragraph = header.add_paragraph() + paragraph.add_run(self._param.header_text) + + self._add_docx_watermark(section) + + footer = section.footer + footer.is_linked_to_previous = False + self._clear_docx_container(footer) + if any( + [ + self._param.footer_text, + self._param.add_timestamp, + self._param.add_page_numbers, + ] + ): + paragraph = footer.add_paragraph() + paragraph.paragraph_format.tab_stops.add_tab_stop( + int(available_width // 2), + WD_TAB_ALIGNMENT.CENTER, + ) + paragraph.paragraph_format.tab_stops.add_tab_stop( + int(available_width), + WD_TAB_ALIGNMENT.RIGHT, + ) + + if self._param.footer_text: + paragraph.add_run(self._param.footer_text) + + if self._param.add_timestamp or self._param.add_page_numbers: + paragraph.add_run("\t") + + if self._param.add_timestamp: + paragraph.add_run(self._get_timestamp_text()) + + if self._param.add_page_numbers: + paragraph.add_run("\t") + self._append_docx_field(paragraph.add_run(), " PAGE ") + + document.save(file_path) + with open(file_path, "rb") as f: + file_bytes = f.read() + return file_path, file_bytes def thoughts(self) -> str: - return "Generating PDF document with formatted content..." + return f"Generating {self._param.output_format.upper()} document with markdown conversion..." - def _generate_docx(self, content: str, title: str = "", subtitle: str = "") -> tuple[str, str]: - """Generate DOCX from markdown-style content""" - import uuid - from docx import Document - from docx.shared import Pt - from docx.enum.text import WD_ALIGN_PARAGRAPH - - # Create output directory if it doesn't exist - os.makedirs(self._param.output_directory, exist_ok=True) - + def _generate_pdf(self, content: str) -> tuple[str, bytes]: try: - # Generate filename - if self._param.filename: - base_name = os.path.splitext(self._param.filename)[0] - filename = f"{base_name}_{uuid.uuid4().hex[:8]}.docx" - else: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"document_{timestamp}_{uuid.uuid4().hex[:8]}.docx" - - file_path = os.path.join(self._param.output_directory, filename) - - # Create document - doc = Document() - - # Add title - if title: - title_para = doc.add_heading(title, level=0) - title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER - - # Add subtitle - if subtitle: - subtitle_para = doc.add_heading(subtitle, level=1) - subtitle_para.alignment = WD_ALIGN_PARAGRAPH.CENTER - - # Add timestamp if enabled - if self._param.add_timestamp: - timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - ts_para = doc.add_paragraph(timestamp_text) - ts_para.runs[0].italic = True - ts_para.runs[0].font.size = Pt(9) - - # Parse and add content - lines = content.split('\n') - i = 0 - while i < len(lines): - line = lines[i].strip() - - if not line: - i += 1 - continue - - # Headings - if line.startswith('# ') and not line.startswith('## '): - doc.add_heading(line[2:].strip(), level=1) - elif line.startswith('## ') and not line.startswith('### '): - doc.add_heading(line[3:].strip(), level=2) - elif line.startswith('### '): - doc.add_heading(line[4:].strip(), level=3) - # Bullet list - elif line.startswith('- ') or line.startswith('* '): - doc.add_paragraph(line[2:].strip(), style='List Bullet') - # Numbered list - elif re.match(r'^\d+\.\s', line): - text = re.sub(r'^\d+\.\s', '', line) - doc.add_paragraph(text, style='List Number') - # Regular paragraph - else: - para = doc.add_paragraph(line) - para.runs[0].font.size = Pt(self._param.font_size) - - i += 1 - - # Save document - doc.save(file_path) - - # Read and encode to base64 - with open(file_path, 'rb') as f: - doc_bytes = f.read() - doc_base64 = base64.b64encode(doc_bytes).decode('utf-8') - - return file_path, doc_base64 - + engine = self._select_pdf_engine() + header_path = self._write_temp_tex(self._build_pdf_heading_overrides()) + try: + file_path, _ = self._generate_pandoc_binary_output( + content, + "pdf", + "pdf", + include_timestamp_in_body=False, + extra_args=[ + "--standalone", + f"--pdf-engine={engine}", + f"--include-in-header={header_path}", + *self._get_pdf_font_args(), + ], + ) + finally: + if os.path.exists(header_path): + os.remove(header_path) + return self._apply_pdf_overlay(file_path) + except Exception as e: + raise Exception(f"PDF generation failed: {str(e)}") + + def _generate_docx(self, content: str) -> tuple[str, bytes]: + try: + file_path, _ = self._generate_pandoc_binary_output( + content, + "docx", + "docx", + include_timestamp_in_body=False, + extra_args=["--standalone"], + ) + return self._decorate_docx(file_path) except Exception as e: raise Exception(f"DOCX generation failed: {str(e)}") - def _generate_txt(self, content: str, title: str = "", subtitle: str = "") -> tuple[str, str]: - """Generate TXT from markdown-style content""" - import uuid - - # Create output directory if it doesn't exist - os.makedirs(self._param.output_directory, exist_ok=True) - + def _generate_txt(self, content: str) -> tuple[str, bytes]: try: - # Generate filename - if self._param.filename: - base_name = os.path.splitext(self._param.filename)[0] - filename = f"{base_name}_{uuid.uuid4().hex[:8]}.txt" - else: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"document_{timestamp}_{uuid.uuid4().hex[:8]}.txt" - - file_path = os.path.join(self._param.output_directory, filename) - - # Build text content - text_content = [] - - if title: - text_content.append(title.upper()) - text_content.append("=" * len(title)) - text_content.append("") - - if subtitle: - text_content.append(subtitle) - text_content.append("-" * len(subtitle)) - text_content.append("") - - if self._param.add_timestamp: - timestamp_text = f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - text_content.append(timestamp_text) - text_content.append("") - - # Add content (keep markdown formatting for readability) - text_content.append(content) - - # Join and save - final_text = '\n'.join(text_content) - - with open(file_path, 'w', encoding='utf-8') as f: - f.write(final_text) - - # Encode to base64 - txt_base64 = base64.b64encode(final_text.encode('utf-8')).decode('utf-8') - - return file_path, txt_base64 - + return self._generate_pandoc_text_output(content, "plain", "txt") except Exception as e: raise Exception(f"TXT generation failed: {str(e)}") + + def _generate_markdown(self, content: str) -> tuple[str, bytes]: + try: + return self._generate_pandoc_text_output(content, "markdown", "md") + except Exception as e: + raise Exception(f"Markdown generation failed: {str(e)}") + + def _generate_html(self, content: str) -> tuple[str, bytes]: + try: + return self._generate_pandoc_text_output(content, "html", "html") + except Exception as e: + raise Exception(f"HTML generation failed: {str(e)}") diff --git a/agent/component/message.py b/agent/component/message.py index cc26ca52ba..8db4eedbd1 100644 --- a/agent/component/message.py +++ b/agent/component/message.py @@ -54,6 +54,9 @@ class MessageParam(ComponentParamBase): self.outputs = { "content": { "type": "str" + }, + "downloads": { + "type": "list" } } @@ -66,10 +69,66 @@ class MessageParam(ComponentParamBase): class Message(ComponentBase): component_name = "Message" + @staticmethod + def _is_download_info(value: Any) -> bool: + return isinstance(value, dict) and all( + key in value for key in ("doc_id", "filename", "mime_type") + ) + + def _extract_downloads(self, value: Any) -> list[dict[str, Any]]: + if isinstance(value, str): + try: + value = json.loads(value) + except Exception: + return [] + + if self._is_download_info(value): + return [value] + + if isinstance(value, list) and all(self._is_download_info(item) for item in value): + return value + + return [] + + def _stringify_message_value( + self, + value: Any, + delimiter: str = None, + downloads: list[dict[str, Any]] | None = None, + fallback_to_str: bool = False, + ) -> str: + extracted_downloads = self._extract_downloads(value) + if extracted_downloads: + if downloads is not None: + downloads.extend(extracted_downloads) + return "" + + if value is None: + return "" + + if isinstance(value, list) and delimiter: + return delimiter.join([str(vv) for vv in value]) + + if isinstance(value, str): + return value + + try: + return json.dumps(value, ensure_ascii=False) + except Exception: + if fallback_to_str: + return str(value) + return "" + def get_input_elements(self) -> dict[str, Any]: return self.get_input_elements_from_text("".join(self._param.content)) - def get_kwargs(self, script:str, kwargs:dict = {}, delimiter:str=None) -> tuple[str, dict[str, str | list | Any]]: + def get_kwargs( + self, + script: str, + kwargs: dict = {}, + delimiter: str = None, + downloads: list[dict[str, Any]] | None = None, + ) -> tuple[str, dict[str, str | list | Any]]: for k,v in self.get_input_elements_from_text(script).items(): if k in kwargs: continue @@ -84,15 +143,8 @@ class Message(ComponentBase): else: for t in iter_obj: ans += t - elif isinstance(v, list) and delimiter: - ans = delimiter.join([str(vv) for vv in v]) - elif not isinstance(v, str): - try: - ans = json.dumps(v, ensure_ascii=False) - except Exception: - pass else: - ans = v + ans = self._stringify_message_value(v, delimiter, downloads) if not ans: ans = "" kwargs[k] = ans @@ -115,6 +167,7 @@ class Message(ComponentBase): s = 0 all_content = "" cache = {} + downloads = [] for r in re.finditer(self.variable_ref_patt, rand_cnt, flags=re.DOTALL): if self.check_if_canceled("Message streaming"): return @@ -154,11 +207,9 @@ class Message(ComponentBase): continue elif inspect.isawaitable(v): v = await v - elif not isinstance(v, str): - try: - v = json.dumps(v, ensure_ascii=False) - except Exception: - v = str(v) + v = self._stringify_message_value( + v, downloads=downloads, fallback_to_str=True + ) yield v self.set_input_value(exp, v) all_content += v @@ -171,6 +222,7 @@ class Message(ComponentBase): all_content += rand_cnt[s: ] yield rand_cnt[s: ] + self.set_output("downloads", downloads) self.set_output("content", all_content) self._convert_content(all_content) await self._save_to_memory(all_content) @@ -191,12 +243,14 @@ class Message(ComponentBase): self.set_output("content", partial(self._stream, rand_cnt)) return - rand_cnt, kwargs = self.get_kwargs(rand_cnt, kwargs) + downloads = [] + rand_cnt, kwargs = self.get_kwargs(rand_cnt, kwargs, downloads=downloads) template = _jinja2_sandbox.from_string(rand_cnt) try: content = template.render(kwargs) - except Exception: - pass + except Exception as e: + logging.warning(f"Jinja2 template rendering failed: {e}") + content = rand_cnt # fallback to unrendered content if self.check_if_canceled("Message processing"): return @@ -204,6 +258,7 @@ class Message(ComponentBase): for n, v in kwargs.items(): content = re.sub(n, v, content) + self.set_output("downloads", downloads) self.set_output("content", content) self._convert_content(content) self._save_to_memory(content) diff --git a/agent/dsl_migration.py b/agent/dsl_migration.py index 6fef629376..ca4ee894c3 100644 --- a/agent/dsl_migration.py +++ b/agent/dsl_migration.py @@ -22,6 +22,7 @@ import re COMPONENT_RENAMES = { "Splitter": "TokenChunker", "HierarchicalMerger": "TitleChunker", + "PDFGenerator": "DocGenerator", } NODE_TYPE_RENAMES = { diff --git a/api/apps/user_app.py b/api/apps/user_app.py index 702e1bd855..7424899269 100644 --- a/api/apps/user_app.py +++ b/api/apps/user_app.py @@ -1029,7 +1029,6 @@ async def forget_reset_password(): new_pwd_string = base64.b64decode(new_pwd_base64).decode('utf-8') new_pwd2_string = base64.b64decode(decrypt(new_pwd2)).decode('utf-8') - REDIS_CONN.get(_verified_key(email)) if not REDIS_CONN.get(_verified_key(email)): return get_json_result(data=False, code=RetCode.AUTHENTICATION_ERROR, message="email not verified") diff --git a/docs/guides/agent/agent_component_reference/docs_generator.md b/docs/guides/agent/agent_component_reference/docs_generator.md deleted file mode 100644 index 3ed8e342af..0000000000 --- a/docs/guides/agent/agent_component_reference/docs_generator.md +++ /dev/null @@ -1,241 +0,0 @@ ---- -sidebar_position: 35 -slug: /docs_generator ---- - -# Docs Generator component - -A component that generates downloadable PDF, DOCX, or TXT documents from markdown-style content with full Unicode support. - ---- - -The **Docs Generator** component enables you to create professional documents directly within your agent workflow. It accepts markdown-formatted text and converts it into downloadable files, making it ideal for generating reports, summaries, or any structured document output. - -## Key features - -- **Multiple output formats**: PDF, DOCX, and TXT -- **Full Unicode support**: Automatic font switching for CJK (Chinese, Japanese, Korean), Arabic, Hebrew, and other non-Latin scripts -- **Rich formatting**: Headers, lists, tables, code blocks, and more -- **Customizable styling**: Fonts, margins, page size, and orientation -- **Document extras**: Logo, watermark, page numbers, and timestamps -- **Direct download**: Generates a download button for the chat interface - -## Prerequisites - -- Content to be converted into a document (typically from an **Agent** or other text-generating component). - -## Examples - -You can pair an **Agent** component with the **Docs Generator** to create dynamic documents based on user queries. The **Agent** generates the content, and the **Docs Generator** converts it into a downloadable file. Connect the output to a **Message** component to display the download button in the chat. - -A typical workflow looks like: - -``` -Begin → Agent → Docs Generator → Message -``` - -In the **Message** component, reference the `download` output variable from the **Docs Generator** to display a download button in the chat interface. - -## Configurations - -### Content - -The main text content to include in the document. Supports Markdown formatting: - -- **Bold**: `**text**` or `__text__` -- **Italic**: `*text*` or `_text_` -- **Inline code**: `` `code` `` -- **Headings**: `# Heading 1`, `## Heading 2`, `### Heading 3` -- **Bullet lists**: `- item` or `* item` -- **Numbered lists**: `1. item` -- **Tables**: `| Column 1 | Column 2 |` -- **Horizontal lines**: `---` -- **Code blocks**: ` ``` code ``` ` - -:::tip NOTE -Click **(x)** or type `/` to insert variables from upstream components. -::: - -### Title - -Optional. The document title displayed at the top of the generated file. - -### Subtitle - -Optional. A subtitle displayed below the title. - -### Output format - -The file format for the generated document: - -- **PDF** (default): Portable Document Format with full styling support. -- **DOCX**: Microsoft Word format. -- **TXT**: Plain text format. - -### Logo image - -Optional. A logo image to display at the top of the document. You can either: - -- Upload an image file using the file picker -- Paste an image path, URL, or base64-encoded data - -### Logo position - -The horizontal position of the logo: - -- **left** (default) -- **center** -- **right** - -### Logo dimensions - -- **Logo width**: Width in inches (default: `2.0`) -- **Logo height**: Height in inches (default: `1.0`) - -### Font family - -The font used throughout the document: - -- **Helvetica** (default) -- **Times-Roman** -- **Courier** -- **Helvetica-Bold** -- **Times-Bold** - -### Font size - -The base font size in points. Defaults to `12`. - -### Title font size - -The font size for the document title. Defaults to `24`. - -### Page size - -The paper size for the document: - -- **A4** (default) -- **Letter** - -### Orientation - -The page orientation: - -- **Portrait** (default) -- **Landscape** - -### Margins - -Page margins in inches: - -- **Margin top**: Defaults to `1.0` -- **Margin bottom**: Defaults to `1.0` -- **Margin left**: Defaults to `1.0` -- **Margin right**: Defaults to `1.0` - -### Filename - -Optional. Custom filename for the generated document. If left empty, a filename is auto-generated with a timestamp. - -### Output directory - -The server directory where generated documents are saved. Defaults to `/tmp/pdf_outputs`. - -### Add page numbers - -When enabled, page numbers are added to the footer of each page. Defaults to `true`. - -### Add timestamp - -When enabled, a generation timestamp is added to the document footer. Defaults to `true`. - -### Watermark text - -Optional. Text to display as a diagonal watermark across each page. Useful for marking documents as "Draft", "Confidential", etc. - -## Output - -The **Docs Generator** component provides the following output variables: - -| Variable name | Type | Description | -|---------------|-----------|--------------------------------------------------------------| -| `file_path` | `string` | The server path where the generated document is saved. | -| `pdf_base64` | `string` | The document content encoded in base64 format. | -| `download` | `string` | JSON containing download information for the chat interface. | -| `success` | `boolean` | Indicates whether the document was generated successfully. | - -### Displaying the download button - -To display a download button in the chat, add a **Message** component after the **Docs Generator** and reference the `download` variable: - -1. Connect the **Docs Generator** output to a **Message** component. -2. In the **Message** component's content field, type `/` and select `{Docs Generator_0@download}`. -3. When the agent runs, a download button will appear in the chat, allowing users to download the generated document. - -The download button automatically handles: -- File type detection (PDF, DOCX, TXT) -- Proper MIME type for browser downloads -- Base64 decoding for direct file delivery - -## Unicode and multi-language support - -The **Docs Generator** includes intelligent font handling for international content: - -### How it works - -1. **Content analysis**: The component scans the text for non-Latin characters. -2. **Automatic font switching**: When CJK or other complex scripts are detected, the system automatically switches to a compatible CID font (STSong-Light for Chinese, HeiseiMin-W3 for Japanese, HYSMyeongJo-Medium for Korean). -3. **Latin content**: For documents containing only Latin characters (including extended Latin, Cyrillic, and Greek), the user-selected font family is used. - -### Supported scripts - -| Script | Unicode Range | Font Used | -|------------------------------|---------------|--------------------| -| Chinese (CJK) | U+4E00–U+9FFF | STSong-Light | -| Japanese (Hiragana/Katakana) | U+3040–U+30FF | HeiseiMin-W3 | -| Korean (Hangul) | U+AC00–U+D7AF | HYSMyeongJo-Medium | -| Arabic | U+0600–U+06FF | CID font fallback | -| Hebrew | U+0590–U+05FF | CID font fallback | -| Devanagari (Hindi) | U+0900–U+097F | CID font fallback | -| Thai | U+0E00–U+0E7F | CID font fallback | - -### Font installation - -For full multi-language support in self-hosted deployments, ensure Unicode fonts are installed: - -**Linux (Debian/Ubuntu):** -```bash -apt-get install fonts-freefont-ttf fonts-noto-cjk -``` - -**Docker:** The official RAGFlow Docker image includes these fonts. For custom images, add the font packages to your Dockerfile: -```dockerfile -RUN apt-get update && apt-get install -y fonts-freefont-ttf fonts-noto-cjk -``` - -:::tip NOTE -CID fonts (STSong-Light, HeiseiMin-W3, etc.) are built into ReportLab and do not require additional installation. They are used automatically when CJK content is detected. -::: - -## Troubleshooting - -### Characters appear as boxes or question marks - -This indicates missing font support. Ensure: -1. The content contains supported Unicode characters. -2. For self-hosted deployments, Unicode fonts are installed on the server. -3. The document is being viewed in a PDF reader that supports embedded fonts. - -### Download button not appearing - -Ensure: -1. The **Message** component is connected after the **Docs Generator**. -2. The `download` variable is correctly referenced using `/` (which appears as `{Docs Generator_0@download}` when copied). -3. The document generation completed successfully (check `success` output). - -### Large tables not rendering correctly - -For tables with many columns or large cell content: -- The component automatically converts wide tables to a definition list format for better readability. -- Consider splitting large tables into multiple smaller tables. -- Use landscape orientation for wide tables. diff --git a/web/src/components/document-download-button/index.tsx b/web/src/components/document-download-button/index.tsx new file mode 100644 index 0000000000..02eefdd461 --- /dev/null +++ b/web/src/components/document-download-button/index.tsx @@ -0,0 +1,88 @@ +import { Button } from '@/components/ui/button'; +import { IDocumentDownloadInfo } from '@/interfaces/database/chat'; +import { downloadFile } from '@/services/file-manager-service'; +import { downloadFileFromBlob } from '@/utils/file-util'; +import { Download, FileText } from 'lucide-react'; +import { useCallback } from 'react'; + +export type DocumentDownloadInfo = IDocumentDownloadInfo; + +interface DocumentDownloadButtonProps { + downloadInfo: DocumentDownloadInfo; + className?: string; +} + +export function DocumentDownloadButton({ + downloadInfo, + className, +}: DocumentDownloadButtonProps) { + const handleDownload = useCallback(async () => { + try { + const ext = + downloadInfo.filename.split('.').pop()?.toLowerCase() || 'bin'; + const response = await downloadFile({ + docId: downloadInfo.doc_id, + ext, + }); + const blob = new Blob([response.data], { + type: downloadInfo.mime_type || response.data.type, + }); + downloadFileFromBlob(blob, downloadInfo.filename); + } catch (error) { + console.error('Error downloading document:', error); + } + }, [downloadInfo]); + + const getDocumentType = () => { + if (downloadInfo.mime_type === 'application/pdf') return 'PDF Document'; + if ( + downloadInfo.mime_type === + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' + ) + return 'Word Document'; + if ( + downloadInfo.mime_type === + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + ) + return 'Excel Document'; + if (downloadInfo.mime_type === 'text/plain') return 'Text Document'; + if (downloadInfo.mime_type === 'text/markdown') return 'Markdown Document'; + if (downloadInfo.mime_type === 'text/html') return 'HTML Document'; + + const ext = downloadInfo.filename.split('.').pop()?.toUpperCase(); + if (ext === 'PDF') return 'PDF Document'; + if (ext === 'DOCX') return 'Word Document'; + if (ext === 'XLSX') return 'Excel Document'; + if (ext === 'TXT') return 'Text Document'; + if (ext === 'MD') return 'Markdown Document'; + if (ext === 'HTML' || ext === 'HTM') return 'HTML Document'; + + return 'Document'; + }; + + return ( +
+
+
+ +
+
+
+
+ {downloadInfo.filename} +
+
{getDocumentType()}
+
+ +
+ ); +} diff --git a/web/src/components/message-item/index.tsx b/web/src/components/message-item/index.tsx index 4f8814a2d1..25a68e22fe 100644 --- a/web/src/components/message-item/index.tsx +++ b/web/src/components/message-item/index.tsx @@ -10,15 +10,11 @@ import { memo, useCallback, useMemo } from 'react'; import { IRegenerateMessage, IRemoveMessageById } from '@/hooks/logic-hooks'; import { cn } from '@/lib/utils'; +import { DocumentDownloadButton } from '../document-download-button'; import MarkdownContent from '../markdown-content'; import { ReferenceDocumentList } from '../next-message-item/reference-document-list'; import { ReferenceImageList } from '../next-message-item/reference-image-list'; import { UploadedMessageFiles } from '../next-message-item/uploaded-message-files'; -import { - PDFDownloadButton, - extractPDFDownloadInfo, - removePDFDownloadInfo, -} from '../pdf-download-button'; import { RAGFlowAvatar } from '../ragflow-avatar'; import SvgIcon from '../svg-icon'; import { useTheme } from '../theme-provider'; @@ -67,19 +63,11 @@ const MessageItem = ({ return reference?.doc_aggs ?? []; }, [reference?.doc_aggs]); - // Extract PDF download info from message content - const pdfDownloadInfo = useMemo( - () => extractPDFDownloadInfo(item.content), - [item.content], + const documentDownloadInfos = useMemo( + () => item.downloads ?? [], + [item.downloads], ); - - // If we have PDF download info, extract the remaining text - const messageContent = useMemo(() => { - if (!pdfDownloadInfo) return item.content; - - // Remove the JSON part from the content to avoid showing it - return removePDFDownloadInfo(item.content, pdfDownloadInfo); - }, [item.content, pdfDownloadInfo]); + const messageContent = item.content; const handleRegenerateMessage = useCallback(() => { regenerateMessage?.(item); @@ -129,7 +117,7 @@ const MessageItem = ({ index !== 0 && ( )} - {/* Show PDF download button if download info is present */} - {pdfDownloadInfo && ( - - )} {/* Show message content if there's any text besides the download */} {messageContent && (
)} + {documentDownloadInfos.length > 0 && ( +
+ {documentDownloadInfos.map((downloadInfo, index) => ( +
+ {index > 0 &&
} + +
+ ))} +
+ )}
diff --git a/web/src/components/next-message-item/index.tsx b/web/src/components/next-message-item/index.tsx index b9e1b32324..ac63a6661a 100644 --- a/web/src/components/next-message-item/index.tsx +++ b/web/src/components/next-message-item/index.tsx @@ -25,12 +25,8 @@ import { citationMarkerReg } from '@/utils/citation-utils'; import { getDirAttribute } from '@/utils/text-direction'; import { isEmpty } from 'lodash'; import { Atom, ChevronDown, ChevronUp } from 'lucide-react'; +import { DocumentDownloadButton } from '../document-download-button'; import MarkdownContent from '../next-markdown-content'; -import { - PDFDownloadButton, - extractPDFDownloadInfo, - removePDFDownloadInfo, -} from '../pdf-download-button'; import { RAGFlowAvatar } from '../ragflow-avatar'; import SvgIcon from '../svg-icon'; import { useTheme } from '../theme-provider'; @@ -102,19 +98,11 @@ function MessageItem({ return Object.values(docs); }, [reference?.doc_aggs]); - // Extract PDF download info from message content - const pdfDownloadInfo = useMemo( - () => extractPDFDownloadInfo(item.content), - [item.content], + const documentDownloadInfos = useMemo( + () => item.downloads ?? [], + [item.downloads], ); - - // If we have PDF download info, extract the remaining text - const messageContent = useMemo(() => { - if (!pdfDownloadInfo) return item.content; - - // Remove the JSON part from the content to avoid showing it - return removePDFDownloadInfo(item.content, pdfDownloadInfo); - }, [item.content, pdfDownloadInfo]); + const messageContent = item.content; const handleRegenerateMessage = useCallback(() => { regenerateMessage?.(item); @@ -137,9 +125,7 @@ function MessageItem({ ); const renderContent = useCallback(() => { - /* Show message content if there's any text besides the download */ - - if (pdfDownloadInfo) { + if (!messageContent && !(item.data || (sendLoading && !isShare))) { return null; } @@ -175,7 +161,6 @@ function MessageItem({ item.data, loading, messageContent, - pdfDownloadInfo, reference, sendLoading, theme, @@ -239,7 +224,7 @@ function MessageItem({ {isShare && !sendLoading && !isEmpty(item.content) && ( ) : ( )} - {/* Show PDF download button if download info is present */} - {pdfDownloadInfo && ( - - )} - {renderContent()} {isAssistant && ( @@ -320,6 +297,16 @@ function MessageItem({ files={item.files as File[] | UploadResponseDataType[]} > )} + {documentDownloadInfos.length > 0 && ( +
+ {documentDownloadInfos.map((downloadInfo, index) => ( +
+ {index > 0 &&
} + +
+ ))} +
+ )} {/* {isAssistant && item.attachment && item.attachment.doc_id && (
-
- ); -} - -// Helper function to detect if content contains document download info -export function extractPDFDownloadInfo( - content: string, -): DocumentDownloadInfo | null { - try { - // Try to parse as JSON first (for pure JSON content) - const parsed = JSON.parse(content); - if (parsed && parsed.filename && parsed.base64 && parsed.mime_type) { - // Accept PDF, DOCX, and TXT formats - const validMimeTypes = [ - 'application/pdf', - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - 'text/plain', - ]; - if (validMimeTypes.includes(parsed.mime_type)) { - return parsed as DocumentDownloadInfo; - } - } - } catch { - // If direct parsing fails, try to extract JSON object from mixed content - // Look for a JSON object that contains the required fields - // This regex finds a balanced JSON object by counting braces - const startPattern = /\{[^{}]*"filename"[^{}]*:/g; - let match; - - while ((match = startPattern.exec(content)) !== null) { - const startIndex = match.index; - let braceCount = 0; - let endIndex = startIndex; - - // Find the matching closing brace - for (let i = startIndex; i < content.length; i++) { - if (content[i] === '{') braceCount++; - if (content[i] === '}') braceCount--; - - if (braceCount === 0) { - endIndex = i + 1; - break; - } - } - - if (endIndex > startIndex) { - try { - const jsonStr = content.substring(startIndex, endIndex); - const parsed = JSON.parse(jsonStr); - if (parsed && parsed.filename && parsed.base64 && parsed.mime_type) { - // Accept PDF, DOCX, and TXT formats - const validMimeTypes = [ - 'application/pdf', - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - 'text/plain', - ]; - if (validMimeTypes.includes(parsed.mime_type)) { - return parsed as DocumentDownloadInfo; - } - } - } catch { - // This wasn't valid JSON, continue searching - } - } - } - } - return null; -} - -// Helper function to remove document download info from content -export function removePDFDownloadInfo( - content: string, - downloadInfo: DocumentDownloadInfo, -): string { - try { - // First, check if the entire content is just the JSON (most common case) - try { - const parsed = JSON.parse(content); - if ( - parsed && - parsed.filename === downloadInfo.filename && - parsed.base64 === downloadInfo.base64 - ) { - // The entire content is just the download JSON, return empty - return ''; - } - } catch { - // Content is not pure JSON, continue with removal - } - - // Try to remove the JSON string from content - const jsonStr = JSON.stringify(downloadInfo); - let cleaned = content.replace(jsonStr, '').trim(); - - // Also try with pretty-printed JSON (with indentation) - const prettyJsonStr = JSON.stringify(downloadInfo, null, 2); - cleaned = cleaned.replace(prettyJsonStr, '').trim(); - - // Also try to find and remove JSON object pattern from mixed content - // This handles cases where the JSON might have different formatting - const startPattern = /\{[^{}]*"filename"[^{}]*"base64"[^{}]*\}/g; - cleaned = cleaned.replace(startPattern, '').trim(); - - return cleaned; - } catch { - return content; - } -} diff --git a/web/src/constants/agent.tsx b/web/src/constants/agent.tsx index d5755e2e03..f52caffccc 100644 --- a/web/src/constants/agent.tsx +++ b/web/src/constants/agent.tsx @@ -108,7 +108,7 @@ export enum Operator { UserFillUp = 'UserFillUp', StringTransform = 'StringTransform', SearXNG = 'SearXNG', - PDFGenerator = 'PDFGenerator', + DocGenerator = 'DocGenerator', Placeholder = 'Placeholder', DataOperations = 'DataOperations', ListOperations = 'ListOperations', diff --git a/web/src/interfaces/database/chat.ts b/web/src/interfaces/database/chat.ts index eeb298fc1d..5cce383f59 100644 --- a/web/src/interfaces/database/chat.ts +++ b/web/src/interfaces/database/chat.ts @@ -1,6 +1,13 @@ import { MessageType } from '@/constants/chat'; import { IAttachment } from '@/hooks/use-send-message'; +export interface IDocumentDownloadInfo { + doc_id: string; + filename: string; + mime_type: string; + size?: number; +} + export interface PromptConfig { empty_response: string; parameters: Parameter[]; @@ -104,6 +111,7 @@ export interface Message { files?: (File | UploadResponseDataType)[]; chatBoxId?: string; attachment?: IAttachment; + downloads?: IDocumentDownloadInfo[]; } export interface IReferenceChunk { @@ -134,6 +142,7 @@ export interface IReferenceObject { export interface IAnswer { answer: string; attachment?: IAttachment; + downloads?: IDocumentDownloadInfo[]; reference?: IReference; conversationId?: string; prompt?: string; diff --git a/web/src/locales/ar.ts b/web/src/locales/ar.ts index 9680863aba..711dd010ce 100644 --- a/web/src/locales/ar.ts +++ b/web/src/locales/ar.ts @@ -1517,12 +1517,8 @@ export default { searXNG: 'احرق XNG', searXNGDescription: 'مكون يبحث عبر عنوان URL لمثيل SearXNG المقدم. حدد TopN وعنوان URL للمثيل.', - pdfGenerator: 'مولد المستندات', - pDFGenerator: 'مولد المستندات', - pdfGeneratorDescription: - 'مكون يقوم بإنشاء المستندات (PDF، DOCX، TXT) من محتوى بتنسيق تخفيض السعر مع تصميم وصور وجداول قابلة للتخصيص. يدعم: **غامق**، *مائل*، # عناوين، - قوائم، جداول مع | بناء الجملة.', - pDFGeneratorDescription: - 'مكون يقوم بإنشاء المستندات (PDF، DOCX، TXT) من محتوى بتنسيق تخفيض السعر مع تصميم وصور وجداول قابلة للتخصيص. يدعم: **غامق**، *مائل*، # عناوين، - قوائم، جداول مع | بناء الجملة.', + docGenerator: 'مولد المستندات', + docGeneratorDescription: 'ينشئ ملفًا من محتوى Markdown.', subtitle: 'الترجمة', logoImage: 'صورة الشعار', logoPosition: 'موقف الشعار', diff --git a/web/src/locales/bg.ts b/web/src/locales/bg.ts index c3a9f86982..1a22ad26fb 100644 --- a/web/src/locales/bg.ts +++ b/web/src/locales/bg.ts @@ -1569,10 +1569,8 @@ The above is the content you need to summarize.`, searXNG: 'SearXNG', searXNGDescription: 'Компонент, който търси чрез вашия SearXNG инстанция URL. Укажете TopN и URL на инстанцията.', - pdfGenerator: 'Генератор на документи', - pDFGenerator: 'Генератор на документи', - pdfGeneratorDescription: `Компонент, който генерира документи (PDF, DOCX, TXT) от markdown-форматирано съдържание с персонализирано стилизиране, изображения и таблици.`, - pDFGeneratorDescription: `Компонент, който генерира документи (PDF, DOCX, TXT) от markdown-форматирано съдържание с персонализирано стилизиране, изображения и таблици.`, + docGenerator: 'Генератор на документи', + docGeneratorDescription: `Генерира файл от Markdown съдържание.`, subtitle: 'Подзаглавие', logoImage: 'Лого изображение', logoPosition: 'Позиция на логото', diff --git a/web/src/locales/de.ts b/web/src/locales/de.ts index 256d21869a..74ff077ec5 100644 --- a/web/src/locales/de.ts +++ b/web/src/locales/de.ts @@ -1624,10 +1624,8 @@ Beispiel: Virtual Hosted Style`, searXNG: 'SearXNG', searXNGDescription: 'Eine Komponente, die auf https://searxng.org/ sucht und Ihnen ermöglicht, die Anzahl der Suchergebnisse mit TopN anzugeben. Sie ergänzt die vorhandenen Wissensdatenbanken.', - pdfGenerator: 'Dokumentengenerator', - pDFGenerator: 'Dokumentengenerator', - pdfGeneratorDescription: `Eine Komponente, die Dokumente (PDF, DOCX, TXT) aus markdown-formatierten Inhalten mit anpassbarem Stil, Bildern und Tabellen generiert. Unterstützt: **fett**, *kursiv*, # Überschriften, - Listen, Tabellen mit | Syntax.`, - pDFGeneratorDescription: `Eine Komponente, die Dokumente (PDF, DOCX, TXT) aus markdown-formatierten Inhalten mit anpassbarem Stil, Bildern und Tabellen generiert. Unterstützt: **fett**, *kursiv*, # Überschriften, - Listen, Tabellen mit | Syntax.`, + docGenerator: 'Dokumentengenerator', + docGeneratorDescription: `Erzeugt eine Datei aus Markdown-Inhalten.`, subtitle: 'Untertitel', logoImage: 'Logo-Bild', logoPosition: 'Logo-Position', diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index f3fc463727..289eee6754 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1692,10 +1692,8 @@ Best for: Documents with flowing, contextually connected content — such as boo searXNG: 'SearXNG', searXNGDescription: 'A component that searches via your provided SearXNG instance URL. Specify TopN and the instance URL.', - pdfGenerator: 'Docs Generator', - pDFGenerator: 'Docs Generator', - pdfGeneratorDescription: `A component that generates documents (PDF, DOCX, TXT) from markdown-formatted content with customizable styling, images, and tables. Supports: **bold**, *italic*, # headings, - lists, tables with | syntax.`, - pDFGeneratorDescription: `A component that generates documents (PDF, DOCX, TXT) from markdown-formatted content with customizable styling, images, and tables. Supports: **bold**, *italic*, # headings, - lists, tables with | syntax.`, + docGenerator: 'Doc Generator', + docGeneratorDescription: `Generate a file from Markdown content.`, subtitle: 'Subtitle', logoImage: 'Logo Image', logoPosition: 'Logo Position', diff --git a/web/src/locales/es.ts b/web/src/locales/es.ts index 3b36b1da13..02bdcef84f 100644 --- a/web/src/locales/es.ts +++ b/web/src/locales/es.ts @@ -611,10 +611,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Un componente que busca a través de la URL de la instancia SearXNG que proporcionas. Especifica TopN y la URL de la instancia.', - pdfGenerator: 'Generador de Documentos', - pDFGenerator: 'Generador de Documentos', - pdfGeneratorDescription: `Un componente que genera documentos (PDF, DOCX, TXT) desde contenido formateado en markdown con estilo personalizable, imágenes y tablas. Soporta: **negrita**, *cursiva*, # encabezados, - listas, tablas con sintaxis |.`, - pDFGeneratorDescription: `Un componente que genera documentos (PDF, DOCX, TXT) desde contenido formateado en markdown con estilo personalizable, imágenes y tablas. Soporta: **negrita**, *cursiva*, # encabezados, - listas, tablas con sintaxis |.`, + docGenerator: 'Generador de Documentos', + docGeneratorDescription: `Genera un archivo a partir de contenido Markdown.`, subtitle: 'Subtítulo', logoImage: 'Imagen Logo', logoPosition: 'Posición Logo', diff --git a/web/src/locales/fr.ts b/web/src/locales/fr.ts index b1fb01476d..2a80fc54b9 100644 --- a/web/src/locales/fr.ts +++ b/web/src/locales/fr.ts @@ -831,10 +831,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: "Un composant qui effectue des recherches via la URL de l'instance de SearXNG que vous fournissez. Spécifiez TopN et l'URL de l'instance.", - pdfGenerator: 'Générateur de Documents', - pDFGenerator: 'Générateur de Documents', - pdfGeneratorDescription: `Un composant qui génère des documents (PDF, DOCX, TXT) à partir de contenu formaté en markdown avec un style personnalisable, des images et des tableaux. Prend en charge : **gras**, *italique*, # titres, - listes, tableaux avec syntaxe |.`, - pDFGeneratorDescription: `Un composant qui génère des documents (PDF, DOCX, TXT) à partir de contenu formaté en markdown avec un style personnalisable, des images et des tableaux. Prend en charge : **gras**, *italique*, # titres, - listes, tableaux avec syntaxe |.`, + docGenerator: 'Générateur de Documents', + docGeneratorDescription: `Génère un fichier à partir de contenu Markdown.`, subtitle: 'Sous-titre', logoImage: 'Image Logo', logoPosition: 'Position Logo', diff --git a/web/src/locales/id.ts b/web/src/locales/id.ts index 95c479de92..6dbb2cce60 100644 --- a/web/src/locales/id.ts +++ b/web/src/locales/id.ts @@ -810,10 +810,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Komponen yang melakukan pencarian menggunakan URL instance SearXNG yang Anda berikan. Spesifikasikan TopN dan URL instance.', - pdfGenerator: 'Pembuat Dokumen', - pDFGenerator: 'Pembuat Dokumen', - pdfGeneratorDescription: `Komponen yang menghasilkan dokumen (PDF, DOCX, TXT) dari konten berformat markdown dengan gaya yang dapat disesuaikan, gambar, dan tabel. Mendukung: **tebal**, *miring*, # judul, - daftar, tabel dengan sintaks |.`, - pDFGeneratorDescription: `Komponen yang menghasilkan dokumen (PDF, DOCX, TXT) dari konten berformat markdown dengan gaya yang dapat disesuaikan, gambar, dan tabel. Mendukung: **tebal**, *miring*, # judul, - daftar, tabel dengan sintaks |.`, + docGenerator: 'Pembuat Dokumen', + docGeneratorDescription: `Menghasilkan file dari konten Markdown.`, subtitle: 'Subjudul', logoImage: 'Gambar Logo', logoPosition: 'Posisi Logo', diff --git a/web/src/locales/it.ts b/web/src/locales/it.ts index 70e59edc29..c72e4e179a 100644 --- a/web/src/locales/it.ts +++ b/web/src/locales/it.ts @@ -969,10 +969,8 @@ Quanto sopra è il contenuto che devi riassumere.`, searXNG: 'SearXNG', searXNGDescription: 'Un componente che cerca tramite lURL dellistanza SearXNG fornita. Specifica TopN e lURL dellistanza.', - pdfGenerator: 'Generatore Documenti', - pDFGenerator: 'Generatore Documenti', - pdfGeneratorDescription: `Un componente che genera documenti (PDF, DOCX, TXT) da contenuti formattati in markdown con stile personalizzabile, immagini e tabelle. Supporta: **grassetto**, *corsivo*, # titoli, - elenchi, tabelle con sintassi |.`, - pDFGeneratorDescription: `Un componente che genera documenti (PDF, DOCX, TXT) da contenuti formattati in markdown con stile personalizzabile, immagini e tabelle. Supporta: **grassetto**, *corsivo*, # titoli, - elenchi, tabelle con sintassi |.`, + docGenerator: 'Generatore Documenti', + docGeneratorDescription: `Genera file da contenuto Markdown.`, subtitle: 'Sottotitolo', logoImage: 'Immagine Logo', logoPosition: 'Posizione Logo', diff --git a/web/src/locales/ja.ts b/web/src/locales/ja.ts index 014635b47d..b9335508f5 100644 --- a/web/src/locales/ja.ts +++ b/web/src/locales/ja.ts @@ -820,10 +820,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'SearXNGのインスタンスURLを提供して検索を行うコンポーネント。TopNとインスタンスURLを指定してください。', - pdfGenerator: 'ドキュメント生成', - pDFGenerator: 'ドキュメント生成', - pdfGeneratorDescription: `マークダウン形式のコンテンツからドキュメント(PDF、DOCX、TXT)を生成するコンポーネント。カスタムスタイル、画像、テーブルをサポート。サポート:**太字**、*斜体*、# 見出し、- リスト、| 構文のテーブル。`, - pDFGeneratorDescription: `マークダウン形式のコンテンツからドキュメント(PDF、DOCX、TXT)を生成するコンポーネント。カスタムスタイル、画像、テーブルをサポート。サポート:**太字**、*斜体*、# 見出し、- リスト、| 構文のテーブル。`, + docGenerator: 'ドキュメント生成', + docGeneratorDescription: `Markdown コンテンツからファイルを生成します。`, subtitle: 'サブタイトル', logoImage: 'ロゴ画像', logoPosition: 'ロゴ位置', diff --git a/web/src/locales/pt-br.ts b/web/src/locales/pt-br.ts index f8a5a7430e..d2675040b5 100644 --- a/web/src/locales/pt-br.ts +++ b/web/src/locales/pt-br.ts @@ -768,10 +768,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Um componente que realiza buscas via URL da instância SearXNG que você fornece. Especifique TopN e URL da instância.', - pdfGenerator: 'Gerador de Documentos', - pDFGenerator: 'Gerador de Documentos', - pdfGeneratorDescription: `Um componente que gera documentos (PDF, DOCX, TXT) de conteúdo formatado em markdown com estilo personalizável, imagens e tabelas. Suporta: **negrito**, *itálico*, # títulos, - listas, tabelas com sintaxe |.`, - pDFGeneratorDescription: `Um componente que gera documentos (PDF, DOCX, TXT) de conteúdo formatado em markdown com estilo personalizável, imagens e tabelas. Suporta: **negrito**, *itálico*, # títulos, - listas, tabelas com sintaxe |.`, + docGenerator: 'Gerador de Documentos', + docGeneratorDescription: `Gera um arquivo a partir de conteúdo Markdown.`, subtitle: 'Subtítulo', logoImage: 'Imagem Logo', logoPosition: 'Posição Logo', diff --git a/web/src/locales/ru.ts b/web/src/locales/ru.ts index 9ed1af9f9a..c5f9c3794d 100644 --- a/web/src/locales/ru.ts +++ b/web/src/locales/ru.ts @@ -1687,10 +1687,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Компонент, который выполняет поиск через ваш предоставленный URL экземпляра SearXNG. Укажите TopN и URL экземпляра.', - pdfGenerator: 'Генератор документов', - pDFGenerator: 'Генератор документов', - pdfGeneratorDescription: `Компонент, который генерирует документы (PDF, DOCX, TXT) из содержимого в формате markdown с настраиваемым стилем, изображениями и таблицами. Поддерживает: **жирный**, *курсив*, # заголовки, - списки, таблицы с синтаксисом |.`, - pDFGeneratorDescription: `Компонент, который генерирует документы (PDF, DOCX, TXT) из содержимого в формате markdown с настраиваемым стилем, изображениями и таблицами. Поддерживает: **жирный**, *курсив*, # заголовки, - списки, таблицы с синтаксисом |.`, + docGenerator: 'Генератор документов', + docGeneratorDescription: `Создает файл из содержимого Markdown.`, subtitle: 'Подзаголовок', logoImage: 'Изображение логотипа', logoPosition: 'Позиция логотипа', diff --git a/web/src/locales/tr.ts b/web/src/locales/tr.ts index fa3eb96a78..8aac442de5 100644 --- a/web/src/locales/tr.ts +++ b/web/src/locales/tr.ts @@ -1626,10 +1626,8 @@ Bu otomatik etiketleme özelliği, mevcut datasete alanına özgü bilgi katman searXNG: 'SearXNG', searXNGDescription: "Sağlanan SearXNG örnek URL'si üzerinden arama yapan bir bileşen.", - pdfGenerator: 'Belge Oluşturucu', - pDFGenerator: 'Belge Oluşturucu', - pdfGeneratorDescription: `Markdown biçimli içerikten belgeler (PDF, DOCX, TXT) oluşturan bir bileşen.`, - pDFGeneratorDescription: `Markdown biçimli içerikten belgeler (PDF, DOCX, TXT) oluşturan bir bileşen.`, + docGenerator: 'Belge Oluşturucu', + docGeneratorDescription: `Markdown içeriğinden bir dosya oluşturur.`, subtitle: 'Alt başlık', logoImage: 'Logo Görüntüsü', logoPosition: 'Logo Konumu', diff --git a/web/src/locales/vi.ts b/web/src/locales/vi.ts index befed677eb..d9225f0851 100644 --- a/web/src/locales/vi.ts +++ b/web/src/locales/vi.ts @@ -852,10 +852,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: 'Một thành phần tìm kiếm thông qua URL phiên bản SearXNG bạn cung cấp. Chỉ định TopN và URL phiên bản.', - pdfGenerator: 'Trình tạo Tài liệu', - pDFGenerator: 'Trình tạo Tài liệu', - pdfGeneratorDescription: `Một thành phần tạo tài liệu (PDF, DOCX, TXT) từ nội dung định dạng markdown với kiểu tùy chỉnh, hình ảnh và bảng. Hỗ trợ: **in đậm**, *in nghiêng*, # tiêu đề, - danh sách, bảng với cú pháp |.`, - pDFGeneratorDescription: `Một thành phần tạo tài liệu (PDF, DOCX, TXT) từ nội dung định dạng markdown với kiểu tùy chỉnh, hình ảnh và bảng. Hỗ trợ: **in đậm**, *in nghiêng*, # tiêu đề, - danh sách, bảng với cú pháp |.`, + docGenerator: 'Trình tạo Tài liệu', + docGeneratorDescription: `Tạo tệp từ nội dung Markdown.`, subtitle: 'Phụ đề', logoImage: 'Hình ảnh Logo', logoPosition: 'Vị trí Logo', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index f65599cff5..5153677371 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -906,10 +906,8 @@ export default { searXNG: 'SearXNG', searXNGDescription: '此組件透過您提供的 SearXNG 實例 URL 進行搜尋。請設定 Top N 和實例 URL。', - pdfGenerator: '文檔生成器', - pPDFGenerator: '文檔生成器', - pdfGeneratorDescription: `該組件從 markdown 格式的內容生成文檔(PDF、DOCX、TXT),支援自定義樣式、圖片和表格。支援:**粗體**、*斜體*、# 標題、- 列表、使用 | 語法的表格。`, - pPDFGeneratorDescription: `該組件從 markdown 格式的內容生成文檔(PDF、DOCX、TXT),支援自定義樣式、圖片和表格。支援:**粗體**、*斜體*、# 標題、- 列表、使用 | 語法的表格。`, + docGenerator: '文檔生成器', + docGeneratorDescription: `從 Markdown 內容產生檔案。`, subtitle: '副標題', logoImage: '標誌圖片', logoPosition: '標誌位置', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index bb4918fcf1..a47ebddabe 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -1455,10 +1455,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 searXNG: 'SearXNG', searXNGDescription: '该组件通过您提供的 SearXNG 实例地址进行搜索。请设置 Top N 和实例 URL。', - pdfGenerator: '文档生成器', - pDFGenerator: '文档生成器', - pdfGeneratorDescription: `该组件从 markdown 格式的内容生成文档(PDF、DOCX、TXT),支持自定义样式、图片和表格。支持:**粗体**、*斜体*、# 标题、- 列表、使用 | 语法的表格。`, - pDFGeneratorDescription: `该组件从 markdown 格式的内容生成文档(PDF、DOCX、TXT),支持自定义样式、图片和表格。支持:**粗体**、*斜体*、# 标题、- 列表、使用 | 语法的表格。`, + docGenerator: '文档生成器', + docGeneratorDescription: `从 Markdown 内容生成文件。`, subtitle: '副标题', logoImage: '标志图片', logoPosition: '标志位置', diff --git a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx index b860dbc9ef..e8bee6f593 100644 --- a/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx +++ b/web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx @@ -122,7 +122,7 @@ export function AccordionOperators({ Operator.Invoke, Operator.WenCai, Operator.SearXNG, - Operator.PDFGenerator, + Operator.DocGenerator, ]} isCustomDropdown={isCustomDropdown} mousePosition={mousePosition} diff --git a/web/src/pages/agent/chat/use-send-agent-message.ts b/web/src/pages/agent/chat/use-send-agent-message.ts index 35bb078e4f..8208ffb754 100644 --- a/web/src/pages/agent/chat/use-send-agent-message.ts +++ b/web/src/pages/agent/chat/use-send-agent-message.ts @@ -86,6 +86,7 @@ export function findMessageFromList(eventList: IEventList) { content: nextContent, audio_binary: audioBinary, attachment: workflowFinished?.data?.outputs?.attachment || {}, + downloads: workflowFinished?.data?.outputs?.downloads || [], }; } @@ -441,7 +442,7 @@ export const useSendAgentMessage = ({ }, [sendMessageInTaskMode]); useEffect(() => { - const { content, id, attachment, audio_binary } = + const { content, id, attachment, audio_binary, downloads } = findMessageFromList(answerList); const inputAnswer = findInputFromList(answerList); const answer = content || getLatestError(answerList); @@ -451,6 +452,7 @@ export const useSendAgentMessage = ({ answer: answer ?? '', audio_binary: audio_binary, attachment: attachment as IAttachment, + downloads, id: id, ...inputAnswer, }); diff --git a/web/src/pages/agent/constant/index.tsx b/web/src/pages/agent/constant/index.tsx index 490c1c42c3..d4fd25335b 100644 --- a/web/src/pages/agent/constant/index.tsx +++ b/web/src/pages/agent/constant/index.tsx @@ -696,7 +696,7 @@ export const RestrictedUpstreamMap = { [Operator.Loop]: [Operator.Begin], [Operator.LoopStart]: [Operator.Begin], [Operator.ExitLoop]: [Operator.Begin], - [Operator.PDFGenerator]: [Operator.Begin], + [Operator.DocGenerator]: [Operator.Begin], }; export const NodeMap = { @@ -747,7 +747,7 @@ export const NodeMap = { [Operator.LoopStart]: 'loopStartNode', [Operator.ExitLoop]: 'exitLoopNode', [Operator.ExcelProcessor]: 'ragNode', - [Operator.PDFGenerator]: 'ragNode', + [Operator.DocGenerator]: 'ragNode', }; export enum BeginQueryType { @@ -963,68 +963,18 @@ export enum AgentVariableType { Conversation = 'conversation', } -// PDF Generator enums -export enum PDFGeneratorFontFamily { - Helvetica = 'Helvetica', - TimesRoman = 'Times-Roman', - Courier = 'Courier', - HelveticaBold = 'Helvetica-Bold', - TimesBold = 'Times-Bold', -} - -export enum PDFGeneratorLogoPosition { - Left = 'left', - Center = 'center', - Right = 'right', -} - -export enum PDFGeneratorPageSize { - A4 = 'A4', - Letter = 'Letter', -} - -export enum PDFGeneratorOrientation { - Portrait = 'portrait', - Landscape = 'landscape', -} - -export const initialPDFGeneratorValues = { +export const initialDocGeneratorValues = { output_format: 'pdf', content: '', - title: '', - subtitle: '', + filename: '', header_text: '', footer_text: '', - logo_image: '', - logo_position: PDFGeneratorLogoPosition.Left, - logo_width: 2.0, - logo_height: 1.0, - font_family: PDFGeneratorFontFamily.Helvetica, - font_size: 12, - title_font_size: 24, - heading1_font_size: 18, - heading2_font_size: 16, - heading3_font_size: 14, - text_color: '#000000', - title_color: '#000000', - page_size: PDFGeneratorPageSize.A4, - orientation: PDFGeneratorOrientation.Portrait, - margin_top: 1.0, - margin_bottom: 1.0, - margin_left: 1.0, - margin_right: 1.0, - line_spacing: 1.2, - filename: '', - output_directory: '/tmp/pdf_outputs', + watermark_text: '', add_page_numbers: true, add_timestamp: true, - watermark_text: '', - enable_toc: false, + font_size: 12, outputs: { - file_path: { type: 'string' }, - pdf_base64: { type: 'string' }, download: { type: 'string' }, - success: { type: 'boolean' }, }, }; diff --git a/web/src/pages/agent/form-sheet/form-config-map.tsx b/web/src/pages/agent/form-sheet/form-config-map.tsx index 7fe720885f..2baafbf16a 100644 --- a/web/src/pages/agent/form-sheet/form-config-map.tsx +++ b/web/src/pages/agent/form-sheet/form-config-map.tsx @@ -7,6 +7,7 @@ import CategorizeForm from '../form/categorize-form'; import CodeForm from '../form/code-form'; import CrawlerForm from '../form/crawler-form'; import DataOperationsForm from '../form/data-operations-form'; +import DocGeneratorForm from '../form/doc-generator-form'; import DuckDuckGoForm from '../form/duckduckgo-form'; import EmailForm from '../form/email-form'; import ExeSQLForm from '../form/exesql-form'; @@ -21,7 +22,6 @@ import ListOperationsForm from '../form/list-operations-form'; import LoopForm from '../form/loop-form'; import MessageForm from '../form/message-form'; import ParserForm from '../form/parser-form'; -import PDFGeneratorForm from '../form/pdf-generator-form'; import PubMedForm from '../form/pubmed-form'; import RetrievalForm from '../form/retrieval-form/next'; import RewriteQuestionForm from '../form/rewrite-question-form'; @@ -111,8 +111,8 @@ export const FormConfigMap = { [Operator.SearXNG]: { component: SearXNGForm, }, - [Operator.PDFGenerator]: { - component: PDFGeneratorForm, + [Operator.DocGenerator]: { + component: DocGeneratorForm, }, [Operator.Note]: { component: () => <>, diff --git a/web/src/pages/agent/form/doc-generator-form/index.tsx b/web/src/pages/agent/form/doc-generator-form/index.tsx new file mode 100644 index 0000000000..e9d0e82dcb --- /dev/null +++ b/web/src/pages/agent/form/doc-generator-form/index.tsx @@ -0,0 +1,254 @@ +import { FormContainer } from '@/components/form-container'; +import { + Form, + FormControl, + FormField, + FormItem, + FormLabel, + FormMessage, +} from '@/components/ui/form'; +import { Input } from '@/components/ui/input'; +import { RAGFlowSelect } from '@/components/ui/select'; +import { Switch } from '@/components/ui/switch'; +import { zodResolver } from '@hookform/resolvers/zod'; +import { t } from 'i18next'; +import { memo, useEffect, useMemo } from 'react'; +import { useForm } from 'react-hook-form'; +import { z } from 'zod'; +import { INextOperatorForm } from '../../interface'; +import { FormWrapper } from '../components/form-wrapper'; +import { Output, transferOutputs } from '../components/output'; +import { PromptEditor } from '../components/prompt-editor'; +import { useValues } from './use-values'; +import { useWatchFormChange } from './use-watch-form-change'; + +function DocGeneratorForm({ node }: INextOperatorForm) { + const values = useValues(node); + + const FormSchema = z.object({ + output_format: z.string().default('pdf'), + content: z.string().min(1, 'Content is required'), + filename: z.string().optional(), + header: z.string().optional(), + footer: z.string().optional(), + watermark: z.string().optional(), + add_page_numbers: z.boolean(), + add_timestamp: z.boolean(), + font_size: z.coerce.number().min(12, 'Font size must be at least 12'), + outputs: z.object({ + download: z.object({ type: z.string() }), + }), + }); + + const form = useForm>({ + defaultValues: values, + resolver: zodResolver(FormSchema), + }); + + const outputFormat = form.watch('output_format'); + const formOutputs = form.watch('outputs'); + + const supportsDocumentDecorations = + outputFormat === 'pdf' || outputFormat === 'docx'; + + const supportsTimestamp = + outputFormat === 'pdf' || + outputFormat === 'docx' || + outputFormat === 'txt' || + outputFormat === 'markdown' || + outputFormat === 'html'; + + const outputList = useMemo(() => { + return transferOutputs(formOutputs ?? values.outputs); + }, [formOutputs, values.outputs]); + + useEffect(() => { + form.setValue('outputs', values.outputs); + }, [form, values.outputs]); + + useWatchFormChange(node?.id, form); + + return ( +
+ + + ( + + Output Format + + + + + + )} + /> + + ( + + {t('flow.content')} + + + + + + )} + /> + + ( + + {t('flow.filename')} + + + + + + )} + /> + + {supportsDocumentDecorations && ( + <> + ( + + {t('flow.fontSize')} + + field.onChange(e.target.value)} + onBlur={(e) => { + field.onBlur(); + const value = Number(e.target.value); + field.onChange( + Number.isFinite(value) && value >= 12 ? value : 12, + ); + }} + /> + + + + )} + /> + + ( + + Header Text + + + + + + )} + /> + + ( + + Footer Text + + + + + + )} + /> + {outputFormat === 'pdf' && ( + ( + + {t('flow.watermarkText')} + + + + + + )} + /> + )} + + ( + + {t('flow.addPageNumbers')} + + + + + )} + /> + + )} + + {supportsTimestamp && ( + ( + + {t('flow.addTimestamp')} + + + + + )} + /> + )} + +
} + /> +
+
+
+ +
+
+ ); +} + +export default memo(DocGeneratorForm); diff --git a/web/src/pages/agent/form/doc-generator-form/use-values.ts b/web/src/pages/agent/form/doc-generator-form/use-values.ts new file mode 100644 index 0000000000..e4426ae8a5 --- /dev/null +++ b/web/src/pages/agent/form/doc-generator-form/use-values.ts @@ -0,0 +1,30 @@ +import { useMemo } from 'react'; +import { Node } from 'reactflow'; +import { initialDocGeneratorValues } from '../../constant'; + +export const useValues = (node?: Node) => { + const values = useMemo(() => { + const supportedOutputFormats = ['pdf', 'docx', 'txt', 'markdown', 'html']; + const nextValues = { + ...initialDocGeneratorValues, + ...(node?.data.form ?? {}), + }; + + return { + output_format: supportedOutputFormats.includes(nextValues.output_format) + ? nextValues.output_format + : initialDocGeneratorValues.output_format, + content: nextValues.content, + filename: nextValues.filename, + header_text: nextValues.header_text, + footer_text: nextValues.footer_text, + watermark_text: nextValues.watermark_text, + add_page_numbers: nextValues.add_page_numbers, + add_timestamp: nextValues.add_timestamp, + font_size: Math.max(12, Number(nextValues.font_size) || 12), + outputs: initialDocGeneratorValues.outputs, + }; + }, [node?.data.form]); + + return values; +}; diff --git a/web/src/pages/agent/form/pdf-generator-form/use-watch-form-change.ts b/web/src/pages/agent/form/doc-generator-form/use-watch-form-change.ts similarity index 100% rename from web/src/pages/agent/form/pdf-generator-form/use-watch-form-change.ts rename to web/src/pages/agent/form/doc-generator-form/use-watch-form-change.ts diff --git a/web/src/pages/agent/form/pdf-generator-form/index.tsx b/web/src/pages/agent/form/pdf-generator-form/index.tsx deleted file mode 100644 index 3c3ce7f16d..0000000000 --- a/web/src/pages/agent/form/pdf-generator-form/index.tsx +++ /dev/null @@ -1,536 +0,0 @@ -import { FormContainer } from '@/components/form-container'; -import { - Form, - FormControl, - FormDescription, - FormField, - FormItem, - FormLabel, - FormMessage, -} from '@/components/ui/form'; -import { Input } from '@/components/ui/input'; -import { RAGFlowSelect } from '@/components/ui/select'; -import { Switch } from '@/components/ui/switch'; -import { zodResolver } from '@hookform/resolvers/zod'; -import { t } from 'i18next'; -import { memo, useMemo } from 'react'; -import { useForm } from 'react-hook-form'; -import { z } from 'zod'; -import { - PDFGeneratorFontFamily, - PDFGeneratorLogoPosition, - PDFGeneratorOrientation, - PDFGeneratorPageSize, -} from '../../constant'; -import { INextOperatorForm } from '../../interface'; -import { FormWrapper } from '../components/form-wrapper'; -import { Output, transferOutputs } from '../components/output'; -import { PromptEditor } from '../components/prompt-editor'; -import { useValues } from './use-values'; -import { useWatchFormChange } from './use-watch-form-change'; - -function PDFGeneratorForm({ node }: INextOperatorForm) { - const values = useValues(node); - - const FormSchema = z.object({ - output_format: z.string().default('pdf'), - content: z.string().min(1, 'Content is required'), - title: z.string().optional(), - subtitle: z.string().optional(), - header_text: z.string().optional(), - footer_text: z.string().optional(), - logo_image: z.string().optional(), - logo_position: z.string(), - logo_width: z.number(), - logo_height: z.number(), - font_family: z.string(), - font_size: z.number(), - title_font_size: z.number(), - heading1_font_size: z.number(), - heading2_font_size: z.number(), - heading3_font_size: z.number(), - text_color: z.string(), - title_color: z.string(), - page_size: z.string(), - orientation: z.string(), - margin_top: z.number(), - margin_bottom: z.number(), - margin_left: z.number(), - margin_right: z.number(), - line_spacing: z.number(), - filename: z.string().optional(), - output_directory: z.string(), - add_page_numbers: z.boolean(), - add_timestamp: z.boolean(), - watermark_text: z.string().optional(), - enable_toc: z.boolean(), - outputs: z.object({ - file_path: z.object({ type: z.string() }), - pdf_base64: z.object({ type: z.string() }), - download: z.object({ type: z.string() }), - success: z.object({ type: z.string() }), - }), - }); - - const form = useForm>({ - defaultValues: values, - resolver: zodResolver(FormSchema), - }); - - const formOutputs = form.watch('outputs'); - - const outputList = useMemo(() => { - return transferOutputs(formOutputs ?? values.outputs); - }, [formOutputs, values.outputs]); - - useWatchFormChange(node?.id, form); - - return ( -
- - - {/* Output Format Selection */} - ( - - Output Format - - - - - Choose the output document format - - - - )} - /> - - {/* Content Section */} - ( - - {t('flow.content')} - - - - -
-
- Markdown support: **bold**, *italic*, - `code`, # Heading 1, ## Heading 2 -
-
- Lists: - bullet or 1. numbered -
-
- Tables: | Column 1 | Column 2 | (use | to - separate columns, <br> or \n for line breaks in - cells) -
-
- Other: --- for horizontal line, ``` for - code blocks -
-
-
- -
- )} - /> - - {/* Title & Subtitle */} - ( - - {t('flow.title')} - - - - - - )} - /> - - ( - - {t('flow.subtitle')} - - - - - - )} - /> - - {/* Logo Settings */} - ( - - {t('flow.logoImage')} - -
- { - const file = e.target.files?.[0]; - if (file) { - const reader = new FileReader(); - reader.onloadend = () => { - field.onChange(reader.result as string); - }; - reader.readAsDataURL(file); - } - }} - className="cursor-pointer" - /> - -
-
- - Upload an image file or paste a file path/URL/base64 - - -
- )} - /> - - ( - - {t('flow.logoPosition')} - - ({ label: val, value: val }), - )} - > - - - - )} - /> - -
- ( - - {t('flow.logoWidth')} (inches) - - - field.onChange(parseFloat(e.target.value)) - } - /> - - - - )} - /> - - ( - - {t('flow.logoHeight')} (inches) - - - field.onChange(parseFloat(e.target.value)) - } - /> - - - - )} - /> -
- - {/* Font Settings */} - ( - - {t('flow.fontFamily')} - - ({ label: val, value: val }), - )} - > - - - - )} - /> - -
- ( - - {t('flow.fontSize')} - - field.onChange(parseInt(e.target.value))} - /> - - - - )} - /> - - ( - - {t('flow.titleFontSize')} - - field.onChange(parseInt(e.target.value))} - /> - - - - )} - /> -
- - {/* Page Settings */} - ( - - {t('flow.pageSize')} - - ({ - label: val, - value: val, - }))} - > - - - - )} - /> - - ( - - {t('flow.orientation')} - - ({ label: val, value: val }), - )} - > - - - - )} - /> - - {/* Margins */} -
- ( - - {t('flow.marginTop')} (inches) - - - field.onChange(parseFloat(e.target.value)) - } - /> - - - - )} - /> - - ( - - {t('flow.marginBottom')} (inches) - - - field.onChange(parseFloat(e.target.value)) - } - /> - - - - )} - /> -
- - {/* Output Settings */} - ( - - {t('flow.filename')} - - - - - - )} - /> - - ( - - {t('flow.outputDirectory')} - - - - - - )} - /> - - {/* Additional Options */} - ( - -
- {t('flow.addPageNumbers')} - - Add page numbers to the document - -
- - - -
- )} - /> - - ( - -
- {t('flow.addTimestamp')} - - Add generation timestamp to the document - -
- - - -
- )} - /> - - ( - - {t('flow.watermarkText')} - - - - - - )} - /> - -
} - /> -
-
-
- -
-
- ); -} - -export default memo(PDFGeneratorForm); diff --git a/web/src/pages/agent/form/pdf-generator-form/use-values.ts b/web/src/pages/agent/form/pdf-generator-form/use-values.ts deleted file mode 100644 index 1ecd829089..0000000000 --- a/web/src/pages/agent/form/pdf-generator-form/use-values.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { useMemo } from 'react'; -import { Node } from 'reactflow'; -import { initialPDFGeneratorValues } from '../../constant'; - -export const useValues = (node?: Node) => { - const values = useMemo(() => { - return node?.data.form ?? initialPDFGeneratorValues; - }, [node?.data.form]); - - return values; -}; diff --git a/web/src/pages/agent/hooks/use-add-node.ts b/web/src/pages/agent/hooks/use-add-node.ts index 3930ab8514..45f9179493 100644 --- a/web/src/pages/agent/hooks/use-add-node.ts +++ b/web/src/pages/agent/hooks/use-add-node.ts @@ -17,6 +17,7 @@ import { initialCodeValues, initialCrawlerValues, initialDataOperationsValues, + initialDocGeneratorValues, initialDuckValues, initialEmailValues, initialExeSqlValues, @@ -31,7 +32,6 @@ import { initialLoopValues, initialMessageValues, initialNoteValues, - initialPDFGeneratorValues, initialParserValues, initialPubMedValues, initialRetrievalValues, @@ -180,7 +180,7 @@ export const useInitializeOperatorParams = () => { [Operator.Loop]: initialLoopValues, [Operator.LoopStart]: {}, [Operator.ExitLoop]: {}, - [Operator.PDFGenerator]: initialPDFGeneratorValues, + [Operator.DocGenerator]: initialDocGeneratorValues, [Operator.ExcelProcessor]: {}, }; }, [llmId]); diff --git a/web/src/pages/agent/hooks/use-get-begin-query.tsx b/web/src/pages/agent/hooks/use-get-begin-query.tsx index 82265a3025..1c6e2aa03c 100644 --- a/web/src/pages/agent/hooks/use-get-begin-query.tsx +++ b/web/src/pages/agent/hooks/use-get-begin-query.tsx @@ -173,6 +173,38 @@ export function useBuildBeginDynamicVariableOptions() { const Env = 'env.'; +function splitOperatorOutputValue(value?: string) { + if (!value) { + return {}; + } + + const [nodeId, output] = value.split('@'); + return { nodeId, output }; +} + +function filterDocGeneratorDownloadOutputOptions( + groups: Array<{ + options: Array<{ value?: string } & Record>; + }>, + allowDocGeneratorDownloadOutput: boolean, + getOperatorTypeFromId: (nodeId?: string) => string | undefined, +) { + return groups.map((group) => ({ + ...group, + options: group.options.filter((option) => { + const { nodeId, output } = splitOperatorOutputValue(option.value); + if ( + output === 'download' && + getOperatorTypeFromId(nodeId) === Operator.DocGenerator + ) { + return allowDocGeneratorDownloadOutput; + } + + return true; + }), + })); +} + export function useBuildGlobalWithBeginVariableOptions() { const { data } = useFetchAgent(); const dynamicBeginOptions = useBuildBeginDynamicVariableOptions(); @@ -270,6 +302,9 @@ export function useBuildQueryVariableOptions({ } & BuildQueryVariableOptions = {}) { const node = useContext(AgentFormContext) || n; const nodes = useGraphStore((state) => state.nodes); + const getOperatorTypeFromId = useGraphStore( + (state) => state.getOperatorTypeFromId, + ); const options = useBuildVariableOptions(node?.id, node?.parentId); @@ -282,14 +317,22 @@ export function useBuildQueryVariableOptions({ [AgentVariableType.Begin]: globalWithBeginVariableOptions, [AgentVariableType.Conversation]: conversationOptions, }; + const allowDocGeneratorDownloadOutput = + node?.data?.label === Operator.Message; const nextOptions = useMemo(() => { - return [ - ...globalWithBeginVariableOptions, - ...conversationOptions, - ...options, - ]; - }, [conversationOptions, globalWithBeginVariableOptions, options]); + return filterDocGeneratorDownloadOutputOptions( + [...globalWithBeginVariableOptions, ...conversationOptions, ...options], + allowDocGeneratorDownloadOutput, + getOperatorTypeFromId, + ); + }, [ + allowDocGeneratorDownloadOutput, + conversationOptions, + getOperatorTypeFromId, + globalWithBeginVariableOptions, + options, + ]); // Which options are entirely under external control? if (!isEmpty(nodeIds) || !isEmpty(variablesExceptOperatorOutputs)) { @@ -299,10 +342,11 @@ export function useBuildQueryVariableOptions({ variablesExceptOperatorOutputs?.map((x) => AgentVariableOptionsMap[x]) ?? []; - return [ - ...flatten(variablesExceptOperatorOutputsOptions), - ...nodeOutputOptions, - ]; + return filterDocGeneratorDownloadOutputOptions( + [...flatten(variablesExceptOperatorOutputsOptions), ...nodeOutputOptions], + allowDocGeneratorDownloadOutput, + getOperatorTypeFromId, + ); } return nextOptions; } diff --git a/web/src/pages/agent/operator-icon.tsx b/web/src/pages/agent/operator-icon.tsx index 60c4028482..30a888257d 100644 --- a/web/src/pages/agent/operator-icon.tsx +++ b/web/src/pages/agent/operator-icon.tsx @@ -56,7 +56,7 @@ export const LucideIconMap = { [Operator.DataOperations]: FileCode, [Operator.Loop]: InfinityIcon, [Operator.ExitLoop]: LogOut, - [Operator.PDFGenerator]: FileText, + [Operator.DocGenerator]: FileText, }; const Empty = () => {