i18n(it): complete Italian translation (49% → 100%) (#15729)

## Summary Brings the Italian locale (`web/src/locales/it.ts`) from approximately **49% coverage** (986 out of 2008 keys) to **100% coverage** (2008/2008 keys), fully aligned with `en.ts` in structure and key count. ### What was missing Previously untranslated sections include: - `skills`, `skillSearch` — agent skills UI - `memories`, `memory` — memory management - `datasetOverview` — dataset statistics - `llmTools` — LLM tool configuration - `explore` — explore/template page - `dataflowParser` — ingestion pipeline parser settings - `flow` (complete) — agent canvas / workflow editor - `setting` connectors section — data source connectors (Google Drive, Gmail, Box, RDBMS, etc.) - Various `header`, `common`, `knowledgeBase`, `chat`, `fileManager` additions ### Translation conventions - Technical terms kept in English: RAG, LLM, API, token, chunk, embedding, prompt, dataset, agent, canvas, knowledge graph, RAPTOR, webhook, and all model/provider names (Bedrock, Tavily, SearXNG, etc.) - `{{placeholder}}` variables preserved unchanged - Informal *tu* form used consistently, matching the existing style - All previously correct translations preserved
2026-06-29 15:31:05 +08:00 · 2026-06-08 12:06:47 +02:00
parent 86b320e746
commit 8abe627e69
2 changed files with 2197 additions and 190 deletions
--- a/common/data_source/rdbms_connector.py
+++ b/common/data_source/rdbms_connector.py
@@ -1,9 +1,10 @@
-"""RDBMS (MySQL/PostgreSQL) data source connector for importing data from relational databases."""
+"""RDBMS (MySQL/PostgreSQL/MSSQL) data source connector for importing data from relational databases."""

 import copy
 import hashlib
 import json
 import logging
+import re
 from datetime import datetime, timezone
 from enum import Enum
 from typing import Any, Dict, Generator, Optional, Union
@@ -26,11 +27,12 @@ class DatabaseType(str, Enum):
    """Supported database types."""
    MYSQL = "mysql"
    POSTGRESQL = "postgresql"
+    MSSQL = "mssql"


 class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
    """
-    Import rows from MySQL or PostgreSQL into documents.
+    Import rows from MySQL, PostgreSQL or Microsoft SQL Server into documents.

    The flow is:
    1. Connect to the configured database.
@@ -58,7 +60,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        Initialize the RDBMS connector.
        
        Args:
-            db_type: Database type ('mysql' or 'postgresql')
+            db_type: Database type ('mysql', 'postgresql', or 'mssql')
            host: Database host
            port: Database port
            database: Database name
@@ -73,8 +75,10 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        self.host = host.strip()
        self.port = port
        self.database = database.strip()
-        self.query = query.strip()
-        self.content_columns = [c.strip() for c in content_columns.split(",") if c.strip()]
+        self.query = self._sanitize_query(query)
+        # content_columns is optional: when empty, every column returned by the
+        # query is used as document content (see _content_columns_for_row).
+        self.content_columns = [c.strip() for c in (content_columns or "").split(",") if c.strip()]
        self.metadata_columns = [c.strip() for c in (metadata_columns or "").split(",") if c.strip()]
        self.id_column = id_column.strip() if id_column else None
        self.timestamp_column = timestamp_column.strip() if timestamp_column else None
@@ -86,6 +90,44 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        self._sync_config: Dict[str, Any] | None = None
        self._pending_sync_cursor_value: Any = None

+    # Language labels that may leak in when a query is pasted from a
+    # markdown ```sql code fence.
+    _FENCE_LANGUAGES = {"sql", "tsql", "t-sql", "mssql", "mysql", "postgresql", "psql"}
+
+    @classmethod
+    def _sanitize_query(cls, raw: Optional[str]) -> str:
+        """Clean a user-supplied SQL query.
+
+        Tolerates queries pasted straight from a markdown code block, e.g.
+        a surrounding ``` ... ``` fence or a leading bare ``sql`` language
+        label on its own line.
+        """
+        query = (raw or "").strip()
+        if not query:
+            return ""
+        # Strip a surrounding ``` ... ``` markdown fence.
+        if query.startswith("```"):
+            query = query[3:]
+            if query.endswith("```"):
+                query = query[:-3]
+            query = query.strip()
+        # Drop a leading line that is only a code-fence language label.
+        head, _, tail = query.partition("\n")
+        if tail and head.strip().lower() in cls._FENCE_LANGUAGES:
+            query = tail.strip()
+        return query
+
+    def _content_columns_for_row(self, row_dict: Dict[str, Any]) -> list[str]:
+        """Resolve which columns make up the document content for a row.
+
+        When no content columns are configured, every column returned by the
+        query is used, excluding the structural id/timestamp columns.
+        """
+        if self.content_columns:
+            return self.content_columns
+        excluded = {self.id_column, self.timestamp_column}
+        return [col for col in row_dict.keys() if col not in excluded]
+
    def load_credentials(self, credentials: Dict[str, Any]) -> Dict[str, Any] | None:
        """Load database credentials."""
        logging.debug(f"Loading credentials for {self.db_type} database: {self.database}")
@@ -142,7 +184,25 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
                )
            except Exception as e:
                raise ConnectorValidationError(f"Failed to connect to PostgreSQL: {e}")
-        
+        elif self.db_type == DatabaseType.MSSQL:
+            try:
+                import pymssql
+            except ImportError:
+                raise ConnectorValidationError(
+                    "pymssql not installed. Please install pymssql."
+                )
+            try:
+                self._connection = pymssql.connect(
+                    server=self.host,
+                    port=self.port,
+                    user=username,
+                    password=password,
+                    database=self.database,
+                    charset="UTF-8",
+                )
+            except Exception as e:
+                raise ConnectorValidationError(f"Failed to connect to SQL Server: {e}")
+
        return self._connection

    def _close_connection(self):
@@ -162,6 +222,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        try:
            if self.db_type == DatabaseType.MYSQL:
                cursor.execute("SHOW TABLES")
+            elif self.db_type == DatabaseType.MSSQL:
+                cursor.execute(
+                    "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES "
+                    "WHERE TABLE_TYPE = 'BASE TABLE'"
+                )
            else:
                cursor.execute(
                    "SELECT table_name FROM information_schema.tables "
@@ -174,22 +239,51 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):


    def _get_base_queries(self) -> list[str]:
+        """Return the list of base SQL queries to execute.
+
+        When a custom query is configured, returns it as a single-element list.
+        Otherwise returns a ``SELECT * FROM <table>`` query for every table in
+        the database.
+        """
        if self.query:
            return [self.query.rstrip(";")]
        return [f"SELECT * FROM {table}" for table in self._get_tables()]


+    @staticmethod
+    def _strip_trailing_order_by(query: str) -> str:
+        """Remove a trailing top-level ORDER BY clause.
+
+        SQL Server rejects ORDER BY inside a derived table
+        ("SELECT ... FROM (<query>) AS src"), and row order is irrelevant for
+        ingestion. A parenthesised ORDER BY (e.g. an OVER(...) window clause)
+        is left untouched because it is not at depth 0.
+        """
+        cleaned = query.rstrip().rstrip(";").rstrip()
+        for match in reversed(list(re.finditer(r"\border\s+by\b", cleaned, re.IGNORECASE))):
+            prefix = cleaned[: match.start()]
+            if prefix.count("(") == prefix.count(")"):
+                return prefix.rstrip()
+        return cleaned
+
    def _wrap_query(self, base_query: str, select_clause: str = "*") -> str:
-        return f"SELECT {select_clause} FROM ({base_query}) AS ragflow_src"
+        """Wrap *base_query* as a derived table so WHERE / SELECT clauses can be appended.
+
+        Strips any trailing top-level ORDER BY before wrapping because SQL Server
+        rejects ORDER BY inside a derived-table subquery.
+        """
+        inner = self._strip_trailing_order_by(base_query)
+        return f"SELECT {select_clause} FROM ({inner}) AS ragflow_src"


    @staticmethod
    def serialize_cursor_value(value: Any) -> Any:
-        # Example:
-        # - int cursor 42 is stored as 42
-        # - datetime cursor 2026-05-07T12:34:56+00:00 is stored as
-        #   {"__ragflow_rdbms_cursor_type__": "datetime", "value": "..."}
-        # Only datetime needs wrapping because connector config is JSON.
+        """Serialize a cursor value to a JSON-safe representation.
+
+        Primitive types (int, float, str) are returned as-is. ``datetime``
+        objects are wrapped in a typed dict so they survive a JSON round-trip:
+        ``{"__ragflow_rdbms_cursor_type__": "datetime", "value": "<isoformat>"}``.
+        """
        if isinstance(value, datetime):
            return {
                "__ragflow_rdbms_cursor_type__": "datetime",
@@ -200,8 +294,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):

    @staticmethod
    def deserialize_cursor_value(value: Any) -> Any:
-        # Reverse the datetime wrapper above.
-        # Non-datetime cursors such as int/str/float are returned as-is.
+        """Deserialize a cursor value produced by :meth:`serialize_cursor_value`.
+
+        Recognises the ``__ragflow_rdbms_cursor_type__`` wrapper and converts it
+        back to a ``datetime``. Any other value is returned unchanged.
+        """
        if (
            isinstance(value, dict)
            and value.get("__ragflow_rdbms_cursor_type__") == "datetime"
@@ -211,6 +308,12 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):


    def _format_sql_value(self, value: Any) -> str:
+        """Format a Python value as a SQL literal suitable for embedding in a WHERE clause.
+
+        Handles ``datetime``, ``bool``, numeric, and string types with
+        database-specific formatting where needed (e.g. MySQL datetime format vs.
+        ISO-8601 for PostgreSQL/MSSQL, boolean literals for PostgreSQL).
+        """
        if isinstance(value, datetime):
            if value.tzinfo is None:
                value = value.replace(tzinfo=timezone.utc)
@@ -238,8 +341,20 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        start: Any = None,
        end: Any = None,
    ) -> str:
+        """Build a query that filters rows by the configured timestamp column.
+
+        When no timestamp column is set, or neither bound is provided, the base
+        query is returned verbatim (no derived-table wrapping) so that trailing
+        clauses such as ORDER BY remain valid for all database backends.
+        Otherwise the base query is wrapped as a derived table and a WHERE clause
+        with ``> start`` and/or ``<= end`` conditions is appended.
+        """
        if not self.timestamp_column or (start is None and end is None):
-            return self._wrap_query(base_query)
+            # No incremental filter to apply: run the user's query verbatim so
+            # trailing clauses such as ORDER BY stay valid. Wrapping it as a
+            # derived table ("SELECT * FROM (... ORDER BY ...) AS src") is
+            # rejected by SQL Server.
+            return base_query

        conditions = []
        if start is not None:
@@ -258,6 +373,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):


    def _build_max_timestamp_query(self, base_query: str) -> str:
+        """Build a query that returns the maximum value of the timestamp column."""
        return (
            f"SELECT MAX(ragflow_src.{self.timestamp_column}) "
            f"FROM ({base_query}) AS ragflow_src"
@@ -265,14 +381,25 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):


    def _build_slim_query(self, base_query: str) -> str:
+        """Build a lightweight query that fetches only the columns needed to identify documents.
+
+        Selects the id column when configured, falls back to the content columns,
+        or selects every column when neither is set (the whole row is hashed to
+        derive the document id).
+        """
        columns = [self.id_column] if self.id_column else self.content_columns
+        if not columns:
+            # No id column and no explicit content columns: the slim snapshot
+            # hashes the whole row, so it needs every column.
+            return self._wrap_query(base_query, "*")
        select_clause = ", ".join(f"ragflow_src.{column}" for column in columns)
        return self._wrap_query(base_query, select_clause)


    def _build_content(self, row_dict: Dict[str, Any]) -> str:
+        """Build the document content string from the resolved content columns of a row."""
        content_parts = []
-        for col in self.content_columns:
+        for col in self._content_columns_for_row(row_dict):
            if col not in row_dict or row_dict[col] is None:
                continue
            value = row_dict[col]
@@ -283,6 +410,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):


    def _build_document_id_from_row(self, row_dict: Dict[str, Any]) -> str:
+        """Derive a stable document id from a database row.
+
+        Uses ``<db_type>:<database>:<id_column_value>`` when an id column is
+        configured, otherwise falls back to an MD5 hash of the document content.
+        """
        if self.id_column and self.id_column in row_dict and row_dict[self.id_column] is not None:
            return f"{self.db_type}:{self.database}:{row_dict[self.id_column]}"
        content = self._build_content(row_dict)
@@ -296,7 +428,9 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        column_names: list[str],
    ) -> Document:
        """Convert a database row to a Document."""
-        row_dict = dict(zip(column_names, row)) if isinstance(row, (list, tuple)) else row
+        # pyodbc.Row (SQL Server) is neither a tuple nor a dict and does not
+        # support string-keyed lookup, so always normalise to a plain dict.
+        row_dict = row if isinstance(row, dict) else dict(zip(column_names, row))
        content = self._build_content(row_dict)
        metadata = {}
        for col in self.metadata_columns:
@@ -320,7 +454,8 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
                else:
                    doc_updated_at = ts_value.astimezone(timezone.utc)

-        first_content_col = self.content_columns[0] if self.content_columns else "record"
+        resolved_content_columns = self._content_columns_for_row(row_dict)
+        first_content_col = resolved_content_columns[0] if resolved_content_columns else "record"
        semantic_id = (
            str(row_dict.get(first_content_col, "database_record"))
            .replace("\n", " ")
@@ -382,6 +517,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        self,
        query: str,
    ) -> Generator[list[SlimDocument], None, None]:
+        """Yield batches of :class:`SlimDocument` objects from *query*.
+
+        Only the document id is populated; no content is fetched. Used during
+        permission sync to detect and remove stale documents.
+        """
        connection = self._get_connection()
        cursor = connection.cursor()

@@ -392,7 +532,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):

            batch: list[SlimDocument] = []
            for row in cursor:
-                row_dict = dict(zip(column_names, row)) if isinstance(row, (list, tuple)) else row
+                row_dict = row if isinstance(row, dict) else dict(zip(column_names, row))
                batch.append(SlimDocument(id=self._build_document_id_from_row(row_dict)))
                if len(batch) >= self.batch_size:
                    yield batch
@@ -409,6 +549,12 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):


    def get_max_cursor_value(self) -> Any:
+        """Return the maximum value of the timestamp column across all base queries.
+
+        Returns ``None`` when no timestamp column is configured or the result set
+        is empty.  Used to snapshot the upper bound of the sync window before
+        fetching documents.
+        """
        if not self.timestamp_column:
            return None

@@ -460,6 +606,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        self,
        callback: Any = None,
    ) -> Generator[list[SlimDocument], None, None]:
+        """Yield slim snapshots of all current documents for stale-document reconciliation."""
        del callback

        base_queries = self._get_base_queries()
@@ -475,6 +622,12 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
            self._close_connection()

    def prepare_sync_state(self, connector_id: str, config: Dict[str, Any]) -> None:
+        """Snapshot the current maximum cursor value before documents are fetched.
+
+        Must be called before :meth:`load_from_cursor_range` so the upper bound
+        of the sync window is captured atomically and can be persisted afterwards
+        via :meth:`persist_sync_state`.
+        """
        self._sync_connector_id = connector_id
        self._sync_config = copy.deepcopy(config)
        if not self.timestamp_column:
@@ -484,12 +637,18 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):


    def get_saved_sync_cursor_value(self) -> Any:
+        """Return the cursor value that was persisted at the end of the previous sync run."""
        if self._sync_config is None:
            return None
        return self.deserialize_cursor_value(self._sync_config.get("sync_cursor_value"))


    def persist_sync_state(self) -> None:
+        """Write the pending cursor value back to the connector config in the database.
+
+        No-op when no timestamp column is configured or :meth:`prepare_sync_state`
+        was not called.
+        """
        if not self.timestamp_column or self._sync_connector_id is None or self._sync_config is None:
            return

@@ -508,6 +667,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        start_value: Any = None,
        end_value: Any = None,
    ) -> Generator[list[Document], None, None]:
+        """Yield documents whose timestamp column falls in ``(start_value, end_value]``.
+
+        Returns an empty iterator when *end_value* is ``None`` or the range is
+        empty (``end_value <= start_value``).
+        """
        if end_value is None:
            self._close_connection()
            return iter(())
@@ -540,12 +704,10 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
        
        if not self.database:
            raise ConnectorValidationError("Database name is required.")
-        
-        if not self.content_columns:
-            raise ConnectorValidationError(
-                "At least one content column must be specified."
-            )
-        
+
+        # content_columns is intentionally optional: an empty value means
+        # "use every column returned by the query" (see _content_columns_for_row).
+
        try:
            connection = self._get_connection()
            cursor = connection.cursor()