From a0be7c7ca752344670aa553b0bbf920726d37874 Mon Sep 17 00:00:00 2001 From: buildearth <1061187456@qq.com> Date: Tue, 7 Apr 2026 10:24:30 +0800 Subject: [PATCH] Fix(connector): expose id_column, timestamp_column, metadata_columns for MySQL/PostgreSQL incremental sync (#13849) ### What problem does this PR solve? The MySQL and PostgreSQL sync classes in `sync_data_source.py` were not passing `id_column`, `timestamp_column`, and `metadata_columns` to `RDBMSConnector`, making incremental sync and document update impossible even when configured. - Without `id_column`: updated records generate new documents instead of overwriting existing ones (doc ID is derived from content hash, so any change produces a new ID). - Without `timestamp_column`: `poll_source` always falls back to full sync, ignoring the configured time range. - The three fields existed in the frontend default values but had no form inputs, so users had no way to fill them in. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) ### Changes - **Backend** (`rag/svr/sync_data_source.py`): pass `id_column`, `timestamp_column`, and `metadata_columns` from `self.conf` to `RDBMSConnector` for both `MySQL` and `PostgreSQL` sync classes. - **Frontend** (`web/src/pages/user-setting/data-source/constant/index.tsx`): add `ID Column`, `Timestamp Column`, and `Metadata Columns` form fields to MySQL and PostgreSQL data source configuration UI with tooltips. Signed-off-by: lixintao Co-authored-by: lixintao --- rag/svr/sync_data_source.py | 6 +++ .../data-source/constant/index.tsx | 48 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index 203cf862d5..4b60780190 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -1307,6 +1307,9 @@ class MySQL(SyncBase): database=self.conf.get("database", ""), query=self.conf.get("query", ""), content_columns=self.conf.get("content_columns", ""), + metadata_columns=self.conf.get("metadata_columns", ""), + id_column=self.conf.get("id_column") or None, + timestamp_column=self.conf.get("timestamp_column") or None, batch_size=self.conf.get("batch_size", INDEX_BATCH_SIZE), ) @@ -1343,6 +1346,9 @@ class PostgreSQL(SyncBase): database=self.conf.get("database", ""), query=self.conf.get("query", ""), content_columns=self.conf.get("content_columns", ""), + metadata_columns=self.conf.get("metadata_columns", ""), + id_column=self.conf.get("id_column") or None, + timestamp_column=self.conf.get("timestamp_column") or None, batch_size=self.conf.get("batch_size", INDEX_BATCH_SIZE), ) diff --git a/web/src/pages/user-setting/data-source/constant/index.tsx b/web/src/pages/user-setting/data-source/constant/index.tsx index 01990ea4f3..aad84d5777 100644 --- a/web/src/pages/user-setting/data-source/constant/index.tsx +++ b/web/src/pages/user-setting/data-source/constant/index.tsx @@ -936,6 +936,30 @@ export const DataSourceFormFields = { placeholder: 'title,description,content', tooltip: t('setting.mysqlContentColumnsTip'), }, + { + label: 'Metadata Columns', + name: 'config.metadata_columns', + type: FormFieldType.Text, + required: false, + placeholder: 'id,category,status', + tooltip: t('setting.mysqlMetadataColumnsTip'), + }, + { + label: 'ID Column', + name: 'config.id_column', + type: FormFieldType.Text, + required: false, + placeholder: 'id', + tooltip: t('setting.mysqlIdColumnTip'), + }, + { + label: 'Timestamp Column', + name: 'config.timestamp_column', + type: FormFieldType.Text, + required: false, + placeholder: 'updated_at', + tooltip: t('setting.mysqlTimestampColumnTip'), + }, ], [DataSourceKey.POSTGRESQL]: [ { @@ -986,6 +1010,30 @@ export const DataSourceFormFields = { placeholder: 'title,description,content', tooltip: t('setting.postgresqlContentColumnsTip'), }, + { + label: 'Metadata Columns', + name: 'config.metadata_columns', + type: FormFieldType.Text, + required: false, + placeholder: 'id,category,status', + tooltip: t('setting.postgresqlMetadataColumnsTip'), + }, + { + label: 'ID Column', + name: 'config.id_column', + type: FormFieldType.Text, + required: false, + placeholder: 'id', + tooltip: t('setting.postgresqlIdColumnTip'), + }, + { + label: 'Timestamp Column', + name: 'config.timestamp_column', + type: FormFieldType.Text, + required: false, + placeholder: 'updated_at', + tooltip: t('setting.postgresqlTimestampColumnTip'), + }, ], };