mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
i18n(it): complete Italian translation (49% → 100%) (#15729)
## Summary
Brings the Italian locale (`web/src/locales/it.ts`) from approximately
**49% coverage** (986 out of 2008 keys) to **100% coverage** (2008/2008
keys), fully aligned with `en.ts` in structure and key count.
### What was missing
Previously untranslated sections include:
- `skills`, `skillSearch` — agent skills UI
- `memories`, `memory` — memory management
- `datasetOverview` — dataset statistics
- `llmTools` — LLM tool configuration
- `explore` — explore/template page
- `dataflowParser` — ingestion pipeline parser settings
- `flow` (complete) — agent canvas / workflow editor
- `setting` connectors section — data source connectors (Google Drive,
Gmail, Box, RDBMS, etc.)
- Various `header`, `common`, `knowledgeBase`, `chat`, `fileManager`
additions
### Translation conventions
- Technical terms kept in English: RAG, LLM, API, token, chunk,
embedding, prompt, dataset, agent, canvas, knowledge graph, RAPTOR,
webhook, and all model/provider names (Bedrock, Tavily, SearXNG, etc.)
- `{{placeholder}}` variables preserved unchanged
- Informal *tu* form used consistently, matching the existing style
- All previously correct translations preserved
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
"""RDBMS (MySQL/PostgreSQL) data source connector for importing data from relational databases."""
|
||||
"""RDBMS (MySQL/PostgreSQL/MSSQL) data source connector for importing data from relational databases."""
|
||||
|
||||
import copy
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Generator, Optional, Union
|
||||
@@ -26,11 +27,12 @@ class DatabaseType(str, Enum):
|
||||
"""Supported database types."""
|
||||
MYSQL = "mysql"
|
||||
POSTGRESQL = "postgresql"
|
||||
MSSQL = "mssql"
|
||||
|
||||
|
||||
class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
"""
|
||||
Import rows from MySQL or PostgreSQL into documents.
|
||||
Import rows from MySQL, PostgreSQL or Microsoft SQL Server into documents.
|
||||
|
||||
The flow is:
|
||||
1. Connect to the configured database.
|
||||
@@ -58,7 +60,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
Initialize the RDBMS connector.
|
||||
|
||||
Args:
|
||||
db_type: Database type ('mysql' or 'postgresql')
|
||||
db_type: Database type ('mysql', 'postgresql', or 'mssql')
|
||||
host: Database host
|
||||
port: Database port
|
||||
database: Database name
|
||||
@@ -73,8 +75,10 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
self.host = host.strip()
|
||||
self.port = port
|
||||
self.database = database.strip()
|
||||
self.query = query.strip()
|
||||
self.content_columns = [c.strip() for c in content_columns.split(",") if c.strip()]
|
||||
self.query = self._sanitize_query(query)
|
||||
# content_columns is optional: when empty, every column returned by the
|
||||
# query is used as document content (see _content_columns_for_row).
|
||||
self.content_columns = [c.strip() for c in (content_columns or "").split(",") if c.strip()]
|
||||
self.metadata_columns = [c.strip() for c in (metadata_columns or "").split(",") if c.strip()]
|
||||
self.id_column = id_column.strip() if id_column else None
|
||||
self.timestamp_column = timestamp_column.strip() if timestamp_column else None
|
||||
@@ -86,6 +90,44 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
self._sync_config: Dict[str, Any] | None = None
|
||||
self._pending_sync_cursor_value: Any = None
|
||||
|
||||
# Language labels that may leak in when a query is pasted from a
|
||||
# markdown ```sql code fence.
|
||||
_FENCE_LANGUAGES = {"sql", "tsql", "t-sql", "mssql", "mysql", "postgresql", "psql"}
|
||||
|
||||
@classmethod
|
||||
def _sanitize_query(cls, raw: Optional[str]) -> str:
|
||||
"""Clean a user-supplied SQL query.
|
||||
|
||||
Tolerates queries pasted straight from a markdown code block, e.g.
|
||||
a surrounding ``` ... ``` fence or a leading bare ``sql`` language
|
||||
label on its own line.
|
||||
"""
|
||||
query = (raw or "").strip()
|
||||
if not query:
|
||||
return ""
|
||||
# Strip a surrounding ``` ... ``` markdown fence.
|
||||
if query.startswith("```"):
|
||||
query = query[3:]
|
||||
if query.endswith("```"):
|
||||
query = query[:-3]
|
||||
query = query.strip()
|
||||
# Drop a leading line that is only a code-fence language label.
|
||||
head, _, tail = query.partition("\n")
|
||||
if tail and head.strip().lower() in cls._FENCE_LANGUAGES:
|
||||
query = tail.strip()
|
||||
return query
|
||||
|
||||
def _content_columns_for_row(self, row_dict: Dict[str, Any]) -> list[str]:
|
||||
"""Resolve which columns make up the document content for a row.
|
||||
|
||||
When no content columns are configured, every column returned by the
|
||||
query is used, excluding the structural id/timestamp columns.
|
||||
"""
|
||||
if self.content_columns:
|
||||
return self.content_columns
|
||||
excluded = {self.id_column, self.timestamp_column}
|
||||
return [col for col in row_dict.keys() if col not in excluded]
|
||||
|
||||
def load_credentials(self, credentials: Dict[str, Any]) -> Dict[str, Any] | None:
|
||||
"""Load database credentials."""
|
||||
logging.debug(f"Loading credentials for {self.db_type} database: {self.database}")
|
||||
@@ -142,7 +184,25 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
)
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(f"Failed to connect to PostgreSQL: {e}")
|
||||
|
||||
elif self.db_type == DatabaseType.MSSQL:
|
||||
try:
|
||||
import pymssql
|
||||
except ImportError:
|
||||
raise ConnectorValidationError(
|
||||
"pymssql not installed. Please install pymssql."
|
||||
)
|
||||
try:
|
||||
self._connection = pymssql.connect(
|
||||
server=self.host,
|
||||
port=self.port,
|
||||
user=username,
|
||||
password=password,
|
||||
database=self.database,
|
||||
charset="UTF-8",
|
||||
)
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(f"Failed to connect to SQL Server: {e}")
|
||||
|
||||
return self._connection
|
||||
|
||||
def _close_connection(self):
|
||||
@@ -162,6 +222,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
try:
|
||||
if self.db_type == DatabaseType.MYSQL:
|
||||
cursor.execute("SHOW TABLES")
|
||||
elif self.db_type == DatabaseType.MSSQL:
|
||||
cursor.execute(
|
||||
"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES "
|
||||
"WHERE TABLE_TYPE = 'BASE TABLE'"
|
||||
)
|
||||
else:
|
||||
cursor.execute(
|
||||
"SELECT table_name FROM information_schema.tables "
|
||||
@@ -174,22 +239,51 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
|
||||
def _get_base_queries(self) -> list[str]:
|
||||
"""Return the list of base SQL queries to execute.
|
||||
|
||||
When a custom query is configured, returns it as a single-element list.
|
||||
Otherwise returns a ``SELECT * FROM <table>`` query for every table in
|
||||
the database.
|
||||
"""
|
||||
if self.query:
|
||||
return [self.query.rstrip(";")]
|
||||
return [f"SELECT * FROM {table}" for table in self._get_tables()]
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _strip_trailing_order_by(query: str) -> str:
|
||||
"""Remove a trailing top-level ORDER BY clause.
|
||||
|
||||
SQL Server rejects ORDER BY inside a derived table
|
||||
("SELECT ... FROM (<query>) AS src"), and row order is irrelevant for
|
||||
ingestion. A parenthesised ORDER BY (e.g. an OVER(...) window clause)
|
||||
is left untouched because it is not at depth 0.
|
||||
"""
|
||||
cleaned = query.rstrip().rstrip(";").rstrip()
|
||||
for match in reversed(list(re.finditer(r"\border\s+by\b", cleaned, re.IGNORECASE))):
|
||||
prefix = cleaned[: match.start()]
|
||||
if prefix.count("(") == prefix.count(")"):
|
||||
return prefix.rstrip()
|
||||
return cleaned
|
||||
|
||||
def _wrap_query(self, base_query: str, select_clause: str = "*") -> str:
|
||||
return f"SELECT {select_clause} FROM ({base_query}) AS ragflow_src"
|
||||
"""Wrap *base_query* as a derived table so WHERE / SELECT clauses can be appended.
|
||||
|
||||
Strips any trailing top-level ORDER BY before wrapping because SQL Server
|
||||
rejects ORDER BY inside a derived-table subquery.
|
||||
"""
|
||||
inner = self._strip_trailing_order_by(base_query)
|
||||
return f"SELECT {select_clause} FROM ({inner}) AS ragflow_src"
|
||||
|
||||
|
||||
@staticmethod
|
||||
def serialize_cursor_value(value: Any) -> Any:
|
||||
# Example:
|
||||
# - int cursor 42 is stored as 42
|
||||
# - datetime cursor 2026-05-07T12:34:56+00:00 is stored as
|
||||
# {"__ragflow_rdbms_cursor_type__": "datetime", "value": "..."}
|
||||
# Only datetime needs wrapping because connector config is JSON.
|
||||
"""Serialize a cursor value to a JSON-safe representation.
|
||||
|
||||
Primitive types (int, float, str) are returned as-is. ``datetime``
|
||||
objects are wrapped in a typed dict so they survive a JSON round-trip:
|
||||
``{"__ragflow_rdbms_cursor_type__": "datetime", "value": "<isoformat>"}``.
|
||||
"""
|
||||
if isinstance(value, datetime):
|
||||
return {
|
||||
"__ragflow_rdbms_cursor_type__": "datetime",
|
||||
@@ -200,8 +294,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
@staticmethod
|
||||
def deserialize_cursor_value(value: Any) -> Any:
|
||||
# Reverse the datetime wrapper above.
|
||||
# Non-datetime cursors such as int/str/float are returned as-is.
|
||||
"""Deserialize a cursor value produced by :meth:`serialize_cursor_value`.
|
||||
|
||||
Recognises the ``__ragflow_rdbms_cursor_type__`` wrapper and converts it
|
||||
back to a ``datetime``. Any other value is returned unchanged.
|
||||
"""
|
||||
if (
|
||||
isinstance(value, dict)
|
||||
and value.get("__ragflow_rdbms_cursor_type__") == "datetime"
|
||||
@@ -211,6 +308,12 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
|
||||
def _format_sql_value(self, value: Any) -> str:
|
||||
"""Format a Python value as a SQL literal suitable for embedding in a WHERE clause.
|
||||
|
||||
Handles ``datetime``, ``bool``, numeric, and string types with
|
||||
database-specific formatting where needed (e.g. MySQL datetime format vs.
|
||||
ISO-8601 for PostgreSQL/MSSQL, boolean literals for PostgreSQL).
|
||||
"""
|
||||
if isinstance(value, datetime):
|
||||
if value.tzinfo is None:
|
||||
value = value.replace(tzinfo=timezone.utc)
|
||||
@@ -238,8 +341,20 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
start: Any = None,
|
||||
end: Any = None,
|
||||
) -> str:
|
||||
"""Build a query that filters rows by the configured timestamp column.
|
||||
|
||||
When no timestamp column is set, or neither bound is provided, the base
|
||||
query is returned verbatim (no derived-table wrapping) so that trailing
|
||||
clauses such as ORDER BY remain valid for all database backends.
|
||||
Otherwise the base query is wrapped as a derived table and a WHERE clause
|
||||
with ``> start`` and/or ``<= end`` conditions is appended.
|
||||
"""
|
||||
if not self.timestamp_column or (start is None and end is None):
|
||||
return self._wrap_query(base_query)
|
||||
# No incremental filter to apply: run the user's query verbatim so
|
||||
# trailing clauses such as ORDER BY stay valid. Wrapping it as a
|
||||
# derived table ("SELECT * FROM (... ORDER BY ...) AS src") is
|
||||
# rejected by SQL Server.
|
||||
return base_query
|
||||
|
||||
conditions = []
|
||||
if start is not None:
|
||||
@@ -258,6 +373,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
|
||||
def _build_max_timestamp_query(self, base_query: str) -> str:
|
||||
"""Build a query that returns the maximum value of the timestamp column."""
|
||||
return (
|
||||
f"SELECT MAX(ragflow_src.{self.timestamp_column}) "
|
||||
f"FROM ({base_query}) AS ragflow_src"
|
||||
@@ -265,14 +381,25 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
|
||||
def _build_slim_query(self, base_query: str) -> str:
|
||||
"""Build a lightweight query that fetches only the columns needed to identify documents.
|
||||
|
||||
Selects the id column when configured, falls back to the content columns,
|
||||
or selects every column when neither is set (the whole row is hashed to
|
||||
derive the document id).
|
||||
"""
|
||||
columns = [self.id_column] if self.id_column else self.content_columns
|
||||
if not columns:
|
||||
# No id column and no explicit content columns: the slim snapshot
|
||||
# hashes the whole row, so it needs every column.
|
||||
return self._wrap_query(base_query, "*")
|
||||
select_clause = ", ".join(f"ragflow_src.{column}" for column in columns)
|
||||
return self._wrap_query(base_query, select_clause)
|
||||
|
||||
|
||||
def _build_content(self, row_dict: Dict[str, Any]) -> str:
|
||||
"""Build the document content string from the resolved content columns of a row."""
|
||||
content_parts = []
|
||||
for col in self.content_columns:
|
||||
for col in self._content_columns_for_row(row_dict):
|
||||
if col not in row_dict or row_dict[col] is None:
|
||||
continue
|
||||
value = row_dict[col]
|
||||
@@ -283,6 +410,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
|
||||
def _build_document_id_from_row(self, row_dict: Dict[str, Any]) -> str:
|
||||
"""Derive a stable document id from a database row.
|
||||
|
||||
Uses ``<db_type>:<database>:<id_column_value>`` when an id column is
|
||||
configured, otherwise falls back to an MD5 hash of the document content.
|
||||
"""
|
||||
if self.id_column and self.id_column in row_dict and row_dict[self.id_column] is not None:
|
||||
return f"{self.db_type}:{self.database}:{row_dict[self.id_column]}"
|
||||
content = self._build_content(row_dict)
|
||||
@@ -296,7 +428,9 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
column_names: list[str],
|
||||
) -> Document:
|
||||
"""Convert a database row to a Document."""
|
||||
row_dict = dict(zip(column_names, row)) if isinstance(row, (list, tuple)) else row
|
||||
# pyodbc.Row (SQL Server) is neither a tuple nor a dict and does not
|
||||
# support string-keyed lookup, so always normalise to a plain dict.
|
||||
row_dict = row if isinstance(row, dict) else dict(zip(column_names, row))
|
||||
content = self._build_content(row_dict)
|
||||
metadata = {}
|
||||
for col in self.metadata_columns:
|
||||
@@ -320,7 +454,8 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
else:
|
||||
doc_updated_at = ts_value.astimezone(timezone.utc)
|
||||
|
||||
first_content_col = self.content_columns[0] if self.content_columns else "record"
|
||||
resolved_content_columns = self._content_columns_for_row(row_dict)
|
||||
first_content_col = resolved_content_columns[0] if resolved_content_columns else "record"
|
||||
semantic_id = (
|
||||
str(row_dict.get(first_content_col, "database_record"))
|
||||
.replace("\n", " ")
|
||||
@@ -382,6 +517,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
self,
|
||||
query: str,
|
||||
) -> Generator[list[SlimDocument], None, None]:
|
||||
"""Yield batches of :class:`SlimDocument` objects from *query*.
|
||||
|
||||
Only the document id is populated; no content is fetched. Used during
|
||||
permission sync to detect and remove stale documents.
|
||||
"""
|
||||
connection = self._get_connection()
|
||||
cursor = connection.cursor()
|
||||
|
||||
@@ -392,7 +532,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
batch: list[SlimDocument] = []
|
||||
for row in cursor:
|
||||
row_dict = dict(zip(column_names, row)) if isinstance(row, (list, tuple)) else row
|
||||
row_dict = row if isinstance(row, dict) else dict(zip(column_names, row))
|
||||
batch.append(SlimDocument(id=self._build_document_id_from_row(row_dict)))
|
||||
if len(batch) >= self.batch_size:
|
||||
yield batch
|
||||
@@ -409,6 +549,12 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
|
||||
def get_max_cursor_value(self) -> Any:
|
||||
"""Return the maximum value of the timestamp column across all base queries.
|
||||
|
||||
Returns ``None`` when no timestamp column is configured or the result set
|
||||
is empty. Used to snapshot the upper bound of the sync window before
|
||||
fetching documents.
|
||||
"""
|
||||
if not self.timestamp_column:
|
||||
return None
|
||||
|
||||
@@ -460,6 +606,7 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
self,
|
||||
callback: Any = None,
|
||||
) -> Generator[list[SlimDocument], None, None]:
|
||||
"""Yield slim snapshots of all current documents for stale-document reconciliation."""
|
||||
del callback
|
||||
|
||||
base_queries = self._get_base_queries()
|
||||
@@ -475,6 +622,12 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
self._close_connection()
|
||||
|
||||
def prepare_sync_state(self, connector_id: str, config: Dict[str, Any]) -> None:
|
||||
"""Snapshot the current maximum cursor value before documents are fetched.
|
||||
|
||||
Must be called before :meth:`load_from_cursor_range` so the upper bound
|
||||
of the sync window is captured atomically and can be persisted afterwards
|
||||
via :meth:`persist_sync_state`.
|
||||
"""
|
||||
self._sync_connector_id = connector_id
|
||||
self._sync_config = copy.deepcopy(config)
|
||||
if not self.timestamp_column:
|
||||
@@ -484,12 +637,18 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
|
||||
def get_saved_sync_cursor_value(self) -> Any:
|
||||
"""Return the cursor value that was persisted at the end of the previous sync run."""
|
||||
if self._sync_config is None:
|
||||
return None
|
||||
return self.deserialize_cursor_value(self._sync_config.get("sync_cursor_value"))
|
||||
|
||||
|
||||
def persist_sync_state(self) -> None:
|
||||
"""Write the pending cursor value back to the connector config in the database.
|
||||
|
||||
No-op when no timestamp column is configured or :meth:`prepare_sync_state`
|
||||
was not called.
|
||||
"""
|
||||
if not self.timestamp_column or self._sync_connector_id is None or self._sync_config is None:
|
||||
return
|
||||
|
||||
@@ -508,6 +667,11 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
start_value: Any = None,
|
||||
end_value: Any = None,
|
||||
) -> Generator[list[Document], None, None]:
|
||||
"""Yield documents whose timestamp column falls in ``(start_value, end_value]``.
|
||||
|
||||
Returns an empty iterator when *end_value* is ``None`` or the range is
|
||||
empty (``end_value <= start_value``).
|
||||
"""
|
||||
if end_value is None:
|
||||
self._close_connection()
|
||||
return iter(())
|
||||
@@ -540,12 +704,10 @@ class RDBMSConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
|
||||
if not self.database:
|
||||
raise ConnectorValidationError("Database name is required.")
|
||||
|
||||
if not self.content_columns:
|
||||
raise ConnectorValidationError(
|
||||
"At least one content column must be specified."
|
||||
)
|
||||
|
||||
|
||||
# content_columns is intentionally optional: an empty value means
|
||||
# "use every column returned by the query" (see _content_columns_for_row).
|
||||
|
||||
try:
|
||||
connection = self._get_connection()
|
||||
cursor = connection.cursor()
|
||||
|
||||
Reference in New Issue
Block a user