i18n(it): complete Italian translation (49% → 100%) (#15729)

## Summary

Brings the Italian locale (`web/src/locales/it.ts`) from approximately
**49% coverage** (986 out of 2008 keys) to **100% coverage** (2008/2008
keys), fully aligned with `en.ts` in structure and key count.

### What was missing

Previously untranslated sections include:
- `skills`, `skillSearch` — agent skills UI
- `memories`, `memory` — memory management
- `datasetOverview` — dataset statistics
- `llmTools` — LLM tool configuration
- `explore` — explore/template page
- `dataflowParser` — ingestion pipeline parser settings
- `flow` (complete) — agent canvas / workflow editor
- `setting` connectors section — data source connectors (Google Drive,
Gmail, Box, RDBMS, etc.)
- Various `header`, `common`, `knowledgeBase`, `chat`, `fileManager`
additions

### Translation conventions

- Technical terms kept in English: RAG, LLM, API, token, chunk,
embedding, prompt, dataset, agent, canvas, knowledge graph, RAPTOR,
webhook, and all model/provider names (Bedrock, Tavily, SearXNG, etc.)
- `{{placeholder}}` variables preserved unchanged
- Informal *tu* form used consistently, matching the existing style
- All previously correct translations preserved
This commit is contained in:
gaulin-ai
2026-06-08 12:06:47 +02:00
committed by GitHub
parent 86b320e746
commit 8abe627e69
2 changed files with 2197 additions and 190 deletions

View File

@@ -1,9 +1,10 @@
"""RDBMS (MySQL/PostgreSQL) data source connector for importing data from relational databases."""
"""RDBMS (MySQL/PostgreSQL/MSSQL) data source connector for importing data from relational databases."""
import copy
import hashlib
import json
import logging
import re
from datetime import datetime, timezone
from enum import Enum
from typing import Any, Dict, Generator, Optional, Union
@@ -26,11 +27,12 @@ class DatabaseType(str, Enum):
"""Supported database types."""
MYSQL = "mysql"
POSTGRESQL = "postgresql"
MSSQL = "mssql"
class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
"""
Import rows from MySQL or PostgreSQL into documents.
Import rows from MySQL, PostgreSQL or Microsoft SQL Server into documents.
The flow is:
1. Connect to the configured database.
@@ -58,7 +60,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
Initialize the RDBMS connector.
Args:
db_type: Database type ('mysql' or 'postgresql')
db_type: Database type ('mysql', 'postgresql', or 'mssql')
host: Database host
port: Database port
database: Database name
@@ -73,8 +75,10 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
self.host = host.strip()
self.port = port
self.database = database.strip()
self.query = query.strip()
self.content_columns = [c.strip() for c in content_columns.split(",") if c.strip()]
self.query = self._sanitize_query(query)
# content_columns is optional: when empty, every column returned by the
# query is used as document content (see _content_columns_for_row).
self.content_columns = [c.strip() for c in (content_columns or "").split(",") if c.strip()]
self.metadata_columns = [c.strip() for c in (metadata_columns or "").split(",") if c.strip()]
self.id_column = id_column.strip() if id_column else None
self.timestamp_column = timestamp_column.strip() if timestamp_column else None
@@ -86,6 +90,44 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
self._sync_config: Dict[str, Any] | None = None
self._pending_sync_cursor_value: Any = None
# Language labels that may leak in when a query is pasted from a
# markdown ```sql code fence.
_FENCE_LANGUAGES = {"sql", "tsql", "t-sql", "mssql", "mysql", "postgresql", "psql"}
@classmethod
def _sanitize_query(cls, raw: Optional[str]) -> str:
"""Clean a user-supplied SQL query.
Tolerates queries pasted straight from a markdown code block, e.g.
a surrounding ``` ... ``` fence or a leading bare ``sql`` language
label on its own line.
"""
query = (raw or "").strip()
if not query:
return ""
# Strip a surrounding ``` ... ``` markdown fence.
if query.startswith("```"):
query = query[3:]
if query.endswith("```"):
query = query[:-3]
query = query.strip()
# Drop a leading line that is only a code-fence language label.
head, _, tail = query.partition("\n")
if tail and head.strip().lower() in cls._FENCE_LANGUAGES:
query = tail.strip()
return query
def _content_columns_for_row(self, row_dict: Dict[str, Any]) -> list[str]:
"""Resolve which columns make up the document content for a row.
When no content columns are configured, every column returned by the
query is used, excluding the structural id/timestamp columns.
"""
if self.content_columns:
return self.content_columns
excluded = {self.id_column, self.timestamp_column}
return [col for col in row_dict.keys() if col not in excluded]
def load_credentials(self, credentials: Dict[str, Any]) -> Dict[str, Any] | None:
"""Load database credentials."""
logging.debug(f"Loading credentials for {self.db_type} database: {self.database}")
@@ -142,6 +184,24 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
)
except Exception as e:
raise ConnectorValidationError(f"Failed to connect to PostgreSQL: {e}")
elif self.db_type == DatabaseType.MSSQL:
try:
import pymssql
except ImportError:
raise ConnectorValidationError(
"pymssql not installed. Please install pymssql."
)
try:
self._connection = pymssql.connect(
server=self.host,
port=self.port,
user=username,
password=password,
database=self.database,
charset="UTF-8",
)
except Exception as e:
raise ConnectorValidationError(f"Failed to connect to SQL Server: {e}")
return self._connection
@@ -162,6 +222,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
try:
if self.db_type == DatabaseType.MYSQL:
cursor.execute("SHOW TABLES")
elif self.db_type == DatabaseType.MSSQL:
cursor.execute(
"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES "
"WHERE TABLE_TYPE = 'BASE TABLE'"
)
else:
cursor.execute(
"SELECT table_name FROM information_schema.tables "
@@ -174,22 +239,51 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
def _get_base_queries(self) -> list[str]:
"""Return the list of base SQL queries to execute.
When a custom query is configured, returns it as a single-element list.
Otherwise returns a ``SELECT * FROM <table>`` query for every table in
the database.
"""
if self.query:
return [self.query.rstrip(";")]
return [f"SELECT * FROM {table}" for table in self._get_tables()]
@staticmethod
def _strip_trailing_order_by(query: str) -> str:
"""Remove a trailing top-level ORDER BY clause.
SQL Server rejects ORDER BY inside a derived table
("SELECT ... FROM (<query>) AS src"), and row order is irrelevant for
ingestion. A parenthesised ORDER BY (e.g. an OVER(...) window clause)
is left untouched because it is not at depth 0.
"""
cleaned = query.rstrip().rstrip(";").rstrip()
for match in reversed(list(re.finditer(r"\border\s+by\b", cleaned, re.IGNORECASE))):
prefix = cleaned[: match.start()]
if prefix.count("(") == prefix.count(")"):
return prefix.rstrip()
return cleaned
def _wrap_query(self, base_query: str, select_clause: str = "*") -> str:
return f"SELECT {select_clause} FROM ({base_query}) AS ragflow_src"
"""Wrap *base_query* as a derived table so WHERE / SELECT clauses can be appended.
Strips any trailing top-level ORDER BY before wrapping because SQL Server
rejects ORDER BY inside a derived-table subquery.
"""
inner = self._strip_trailing_order_by(base_query)
return f"SELECT {select_clause} FROM ({inner}) AS ragflow_src"
@staticmethod
def serialize_cursor_value(value: Any) -> Any:
# Example:
# - int cursor 42 is stored as 42
# - datetime cursor 2026-05-07T12:34:56+00:00 is stored as
# {"__ragflow_rdbms_cursor_type__": "datetime", "value": "..."}
# Only datetime needs wrapping because connector config is JSON.
"""Serialize a cursor value to a JSON-safe representation.
Primitive types (int, float, str) are returned as-is. ``datetime``
objects are wrapped in a typed dict so they survive a JSON round-trip:
``{"__ragflow_rdbms_cursor_type__": "datetime", "value": "<isoformat>"}``.
"""
if isinstance(value, datetime):
return {
"__ragflow_rdbms_cursor_type__": "datetime",
@@ -200,8 +294,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
@staticmethod
def deserialize_cursor_value(value: Any) -> Any:
# Reverse the datetime wrapper above.
# Non-datetime cursors such as int/str/float are returned as-is.
"""Deserialize a cursor value produced by :meth:`serialize_cursor_value`.
Recognises the ``__ragflow_rdbms_cursor_type__`` wrapper and converts it
back to a ``datetime``. Any other value is returned unchanged.
"""
if (
isinstance(value, dict)
and value.get("__ragflow_rdbms_cursor_type__") == "datetime"
@@ -211,6 +308,12 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
def _format_sql_value(self, value: Any) -> str:
"""Format a Python value as a SQL literal suitable for embedding in a WHERE clause.
Handles ``datetime``, ``bool``, numeric, and string types with
database-specific formatting where needed (e.g. MySQL datetime format vs.
ISO-8601 for PostgreSQL/MSSQL, boolean literals for PostgreSQL).
"""
if isinstance(value, datetime):
if value.tzinfo is None:
value = value.replace(tzinfo=timezone.utc)
@@ -238,8 +341,20 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
start: Any = None,
end: Any = None,
) -> str:
"""Build a query that filters rows by the configured timestamp column.
When no timestamp column is set, or neither bound is provided, the base
query is returned verbatim (no derived-table wrapping) so that trailing
clauses such as ORDER BY remain valid for all database backends.
Otherwise the base query is wrapped as a derived table and a WHERE clause
with ``> start`` and/or ``<= end`` conditions is appended.
"""
if not self.timestamp_column or (start is None and end is None):
return self._wrap_query(base_query)
# No incremental filter to apply: run the user's query verbatim so
# trailing clauses such as ORDER BY stay valid. Wrapping it as a
# derived table ("SELECT * FROM (... ORDER BY ...) AS src") is
# rejected by SQL Server.
return base_query
conditions = []
if start is not None:
@@ -258,6 +373,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
def _build_max_timestamp_query(self, base_query: str) -> str:
"""Build a query that returns the maximum value of the timestamp column."""
return (
f"SELECT MAX(ragflow_src.{self.timestamp_column}) "
f"FROM ({base_query}) AS ragflow_src"
@@ -265,14 +381,25 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
def _build_slim_query(self, base_query: str) -> str:
"""Build a lightweight query that fetches only the columns needed to identify documents.
Selects the id column when configured, falls back to the content columns,
or selects every column when neither is set (the whole row is hashed to
derive the document id).
"""
columns = [self.id_column] if self.id_column else self.content_columns
if not columns:
# No id column and no explicit content columns: the slim snapshot
# hashes the whole row, so it needs every column.
return self._wrap_query(base_query, "*")
select_clause = ", ".join(f"ragflow_src.{column}" for column in columns)
return self._wrap_query(base_query, select_clause)
def _build_content(self, row_dict: Dict[str, Any]) -> str:
"""Build the document content string from the resolved content columns of a row."""
content_parts = []
for col in self.content_columns:
for col in self._content_columns_for_row(row_dict):
if col not in row_dict or row_dict[col] is None:
continue
value = row_dict[col]
@@ -283,6 +410,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
def _build_document_id_from_row(self, row_dict: Dict[str, Any]) -> str:
"""Derive a stable document id from a database row.
Uses ``<db_type>:<database>:<id_column_value>`` when an id column is
configured, otherwise falls back to an MD5 hash of the document content.
"""
if self.id_column and self.id_column in row_dict and row_dict[self.id_column] is not None:
return f"{self.db_type}:{self.database}:{row_dict[self.id_column]}"
content = self._build_content(row_dict)
@@ -296,7 +428,9 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
column_names: list[str],
) -> Document:
"""Convert a database row to a Document."""
row_dict = dict(zip(column_names, row)) if isinstance(row, (list, tuple)) else row
# pyodbc.Row (SQL Server) is neither a tuple nor a dict and does not
# support string-keyed lookup, so always normalise to a plain dict.
row_dict = row if isinstance(row, dict) else dict(zip(column_names, row))
content = self._build_content(row_dict)
metadata = {}
for col in self.metadata_columns:
@@ -320,7 +454,8 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
else:
doc_updated_at = ts_value.astimezone(timezone.utc)
first_content_col = self.content_columns[0] if self.content_columns else "record"
resolved_content_columns = self._content_columns_for_row(row_dict)
first_content_col = resolved_content_columns[0] if resolved_content_columns else "record"
semantic_id = (
str(row_dict.get(first_content_col, "database_record"))
.replace("\n", " ")
@@ -382,6 +517,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
self,
query: str,
) -> Generator[list[SlimDocument], None, None]:
"""Yield batches of :class:`SlimDocument` objects from *query*.
Only the document id is populated; no content is fetched. Used during
permission sync to detect and remove stale documents.
"""
connection = self._get_connection()
cursor = connection.cursor()
@@ -392,7 +532,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
batch: list[SlimDocument] = []
for row in cursor:
row_dict = dict(zip(column_names, row)) if isinstance(row, (list, tuple)) else row
row_dict = row if isinstance(row, dict) else dict(zip(column_names, row))
batch.append(SlimDocument(id=self._build_document_id_from_row(row_dict)))
if len(batch) >= self.batch_size:
yield batch
@@ -409,6 +549,12 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
def get_max_cursor_value(self) -> Any:
"""Return the maximum value of the timestamp column across all base queries.
Returns ``None`` when no timestamp column is configured or the result set
is empty. Used to snapshot the upper bound of the sync window before
fetching documents.
"""
if not self.timestamp_column:
return None
@@ -460,6 +606,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
self,
callback: Any = None,
) -> Generator[list[SlimDocument], None, None]:
"""Yield slim snapshots of all current documents for stale-document reconciliation."""
del callback
base_queries = self._get_base_queries()
@@ -475,6 +622,12 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
self._close_connection()
def prepare_sync_state(self, connector_id: str, config: Dict[str, Any]) -> None:
"""Snapshot the current maximum cursor value before documents are fetched.
Must be called before :meth:`load_from_cursor_range` so the upper bound
of the sync window is captured atomically and can be persisted afterwards
via :meth:`persist_sync_state`.
"""
self._sync_connector_id = connector_id
self._sync_config = copy.deepcopy(config)
if not self.timestamp_column:
@@ -484,12 +637,18 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
def get_saved_sync_cursor_value(self) -> Any:
"""Return the cursor value that was persisted at the end of the previous sync run."""
if self._sync_config is None:
return None
return self.deserialize_cursor_value(self._sync_config.get("sync_cursor_value"))
def persist_sync_state(self) -> None:
"""Write the pending cursor value back to the connector config in the database.
No-op when no timestamp column is configured or :meth:`prepare_sync_state`
was not called.
"""
if not self.timestamp_column or self._sync_connector_id is None or self._sync_config is None:
return
@@ -508,6 +667,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
start_value: Any = None,
end_value: Any = None,
) -> Generator[list[Document], None, None]:
"""Yield documents whose timestamp column falls in ``(start_value, end_value]``.
Returns an empty iterator when *end_value* is ``None`` or the range is
empty (``end_value <= start_value``).
"""
if end_value is None:
self._close_connection()
return iter(())
@@ -541,10 +705,8 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
if not self.database:
raise ConnectorValidationError("Database name is required.")
if not self.content_columns:
raise ConnectorValidationError(
"At least one content column must be specified."
)
# content_columns is intentionally optional: an empty value means
# "use every column returned by the query" (see _content_columns_for_row).
try:
connection = self._get_connection()

File diff suppressed because it is too large Load Diff