diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index d251f7c27c..23d62a5bf9 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -641,14 +641,61 @@ class Notion(SyncBase): class Discord(SyncBase): SOURCE_NAME: str = FileSource.DISCORD + @staticmethod + def _coerce_str_list(raw): + """Normalise a config field that may arrive as a list (Tag input from + the new web form), a comma-separated string (legacy/SDK callers), or + None into a clean ``list[str]`` with empty entries dropped. + + Fixes #15790 — the previous ``.split(',')`` call assumed a string and + raised ``'list' object has no attribute 'split'`` for any config saved + through the current UI. + """ + if not raw: + return [] + if isinstance(raw, str): + items = raw.split(",") + elif isinstance(raw, (list, tuple, set)): + items = list(raw) + else: + items = [raw] + # Drop None explicitly so it doesn't survive as the literal string + # "None" (str(None) == "None") — only stringify real values. + cleaned: list[str] = [] + for item in items: + if item is None: + continue + text = str(item).strip() + if text: + cleaned.append(text) + return cleaned + async def _generate(self, task: dict): - server_ids: str | None = self.conf.get("server_ids", None) - # "channel1,channel2" - channel_names: str | None = self.conf.get("channel_names", None) + server_ids_raw = self.conf.get("server_ids", None) + # Web form stores channels under "channels"; older configs / SDK use + # "channel_names" — accept either so existing sources keep working. + channels_raw = self.conf.get("channels", None) + if channels_raw in (None, "", []): + channels_raw = self.conf.get("channel_names", None) + + server_id_strs = self._coerce_str_list(server_ids_raw) + # DiscordConnector.__init__ takes server_ids as list[str] and converts + # to list[int] internally (common/data_source/discord_connector.py:247). + # Validate up-front so a malformed entry warns + drops here rather than + # crashing the connector's int() cast — but keep the strings. + server_ids: list[str] = [] + for sid in server_id_strs: + try: + int(sid) + except ValueError: + logging.warning("Discord connector: ignoring non-integer server_id %r", sid) + continue + server_ids.append(sid) + channel_names = self._coerce_str_list(channels_raw) self.connector = DiscordConnector( - server_ids=server_ids.split(",") if server_ids else [], - channel_names=channel_names.split(",") if channel_names else [], + server_ids=server_ids, + channel_names=channel_names, start_date=datetime(1970, 1, 1, tzinfo=timezone.utc).strftime("%Y-%m-%d"), batch_size=self.conf.get("batch_size", 1024), )