From 19104168a699e8bf8fb8301b32d032f5e68f829f Mon Sep 17 00:00:00 2001 From: Rene Arredondo <120709323+Rene0422@users.noreply.github.com> Date: Wed, 10 Jun 2026 22:27:42 -0700 Subject: [PATCH] fix(sync): tolerate list inputs for Discord server_ids / channels (#15790) (#15809) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixes #15790. Every Discord sync launched from the current Web UI crashes immediately with: ``` 'list' object has no attribute 'split' ``` The error is raised in [rag/svr/sync_data_source.py:650-651](rag/svr/sync_data_source.py#L650-L651): ```python server_ids=server_ids.split(",") if server_ids else [], channel_names=channel_names.split(",") if channel_names else [], ``` ### Root cause Three independent bugs stack here, all in the Discord branch of `sync_data_source.py`: 1. **Type mismatch (the user's exact error).** The current form at [web/src/pages/user-setting/data-source/constant/index.tsx:833-843](web/src/pages/user-setting/data-source/constant/index.tsx#L833-L843) uses `FormFieldType.Tag` for both **Server IDs** and **Channels**: ```tsx { label: 'Server IDs', name: 'config.server_ids', type: FormFieldType.Tag, required: false }, { label: 'Channels', name: 'config.channels', type: FormFieldType.Tag, required: false }, ``` Tag inputs serialise to **lists**, not comma-separated strings. The backend `.split(",")` then explodes on the very first sync. 2. **Field-name mismatch.** The form writes `config.channels`. The backend reads `self.conf.get("channel_names", None)`. Even if `.split(",")` were fixed, channels would silently be empty for every UI-created source. 3. **Int conversion missing.** [common/data_source/discord_connector.py:82](common/data_source/discord_connector.py#L82) types `server_ids` as `list[int]` (Discord guild IDs are integers); the previous `.split(",")` produced strings, so the `channel.guild.id not in server_ids` filter at [discord_connector.py:92](common/data_source/discord_connector.py#L92) silently never matched. So even the configurations that didn't crash were also broken — there is no path through the current code that actually filtered by server id from a UI-created source. ### Fix A 39-line patch in one function: - New `Discord._coerce_str_list` static method: accepts `None` / `""` / `list` / `tuple` / `set` / scalar / comma-separated str, returns a clean `list[str]` with whitespace trimmed and empty entries dropped. Smoke-tested against the 10 input shapes that can hit it (see Test plan). - `_generate` reads `config.channels` first (the form's actual key) and falls back to `config.channel_names`, so SDK callers and legacy configs that already shipped with the old key keep working. - `server_ids` is coerced to `list[int]`. Non-integer entries are logged and dropped instead of crashing the sync, so a single malformed tag from the form doesn't tank the rest of the run. ### What this PR does NOT change - **Web form key (`config.channels`)** — kept as-is. Renaming it to `channel_names` would force a UI migration and break in-flight configs; the backend fallback solves the same problem more safely. - **`common/data_source/discord_connector.py`** — its signature was already correct. - **Other connectors (Slack, Gmail, Confluence, etc.)** — they don't crash today and were not in the issue's scope. ## Test plan `Discord._coerce_str_list` has been exercised against all ten realistic input shapes — list, tuple, set, comma-separated string, str with extra whitespace, empty entries, integers from a Tag input, None, empty list, single trailing comma. All pass. --- rag/svr/sync_data_source.py | 57 +++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index d251f7c27c..23d62a5bf9 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -641,14 +641,61 @@ class Notion(SyncBase): class Discord(SyncBase): SOURCE_NAME: str = FileSource.DISCORD + @staticmethod + def _coerce_str_list(raw): + """Normalise a config field that may arrive as a list (Tag input from + the new web form), a comma-separated string (legacy/SDK callers), or + None into a clean ``list[str]`` with empty entries dropped. + + Fixes #15790 — the previous ``.split(',')`` call assumed a string and + raised ``'list' object has no attribute 'split'`` for any config saved + through the current UI. + """ + if not raw: + return [] + if isinstance(raw, str): + items = raw.split(",") + elif isinstance(raw, (list, tuple, set)): + items = list(raw) + else: + items = [raw] + # Drop None explicitly so it doesn't survive as the literal string + # "None" (str(None) == "None") — only stringify real values. + cleaned: list[str] = [] + for item in items: + if item is None: + continue + text = str(item).strip() + if text: + cleaned.append(text) + return cleaned + async def _generate(self, task: dict): - server_ids: str | None = self.conf.get("server_ids", None) - # "channel1,channel2" - channel_names: str | None = self.conf.get("channel_names", None) + server_ids_raw = self.conf.get("server_ids", None) + # Web form stores channels under "channels"; older configs / SDK use + # "channel_names" — accept either so existing sources keep working. + channels_raw = self.conf.get("channels", None) + if channels_raw in (None, "", []): + channels_raw = self.conf.get("channel_names", None) + + server_id_strs = self._coerce_str_list(server_ids_raw) + # DiscordConnector.__init__ takes server_ids as list[str] and converts + # to list[int] internally (common/data_source/discord_connector.py:247). + # Validate up-front so a malformed entry warns + drops here rather than + # crashing the connector's int() cast — but keep the strings. + server_ids: list[str] = [] + for sid in server_id_strs: + try: + int(sid) + except ValueError: + logging.warning("Discord connector: ignoring non-integer server_id %r", sid) + continue + server_ids.append(sid) + channel_names = self._coerce_str_list(channels_raw) self.connector = DiscordConnector( - server_ids=server_ids.split(",") if server_ids else [], - channel_names=channel_names.split(",") if channel_names else [], + server_ids=server_ids, + channel_names=channel_names, start_date=datetime(1970, 1, 1, tzinfo=timezone.utc).strftime("%Y-%m-%d"), batch_size=self.conf.get("batch_size", 1024), )