mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
feat: implement Slack data source connector (#15188)
### What problem does this PR solve? Closes #15187. RAGFlow shipped a Slack connector (`common/data_source/slack_connector.py`) but it was never usable: `Slack._generate()` in the sync worker was a `pass` stub, the connector's document-generating code was incompatible with the current data model, and Slack was commented out of the data-source settings UI. As a result, teams had no way to index Slack channels/threads into a knowledge base. This PR completes the connector end to end. **Backend** - `common/data_source/slack_connector.py` - Rewrote `thread_to_doc` to produce a blob-based `Document` (`extension`/`blob`/`size_bytes`). The previous implementation built the doc with a `sections=[...]` argument and omitted the now-required `blob`/`extension`/ `size_bytes` fields, so it raised a validation error against the current `Document` model. Thread messages are now cleaned and flattened into a single UTF-8 text blob. - Added `load_from_state()` / `poll_source(start, end)` generators. The connector's checkpoint interface is a no-op stub, so both full and incremental syncs run through a single channel-iterating generator built on the existing module helpers (`get_channels`, `filter_channels`, `get_channel_messages`, `_process_message`), with per-channel thread de-duplication. - `rag/svr/sync_data_source.py` - Implemented `Slack._generate()`. Credentials are loaded via `StaticCredentialsProvider` (the connector requires `slack_bot_token` and does not support `load_credentials`). Supports full reindex and incremental polling from `poll_range_start`, plus the optional channel filter. Modeled on the Confluence/Dropbox wrappers. - `SlackConnector` was already exported from `common/data_source/__init__.py`. **Frontend (`web/`)** - Enabled the `SLACK` data-source enum and added its form fields (Slack bot token + optional channel filter), default values, display metadata, and a Slack icon. - Added `slackDescription` / `slackBotTokenTip` / `slackChannelsTip` strings to `en.ts` and `zh.ts`. **Tests** - `test/unit_test/data_source/test_slack_connector_unit.py`: unit tests covering credential loading (`load_credentials` raises, `set_credentials_provider` initializes clients, missing credentials raises) and document generation (standalone message + flattened thread, blob/extension/size_bytes/metadata, and the incremental poll time window). All 5 pass; `ruff check` is clean. Required Slack scopes: `channels:read`, `channels:history`, `users:read`. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@@ -61,6 +61,7 @@ from common.data_source import (
|
||||
RDBMSConnector,
|
||||
DingTalkAITableConnector,
|
||||
RestAPIConnector,
|
||||
SlackConnector,
|
||||
SharePointConnector,
|
||||
)
|
||||
from common.data_source.models import ConnectorFailure, SeafileSyncScope
|
||||
@@ -999,7 +1000,58 @@ class Slack(SyncBase):
|
||||
SOURCE_NAME: str = FileSource.SLACK
|
||||
|
||||
async def _generate(self, task: dict):
|
||||
pass
|
||||
from common.data_source.config import DocumentSource
|
||||
from common.data_source.interfaces import StaticCredentialsProvider
|
||||
|
||||
channels_conf = self.conf.get("channels")
|
||||
if isinstance(channels_conf, str):
|
||||
channels = [c.strip() for c in channels_conf.split(",") if c.strip()]
|
||||
elif isinstance(channels_conf, list):
|
||||
channels = [str(c).strip() for c in channels_conf if str(c).strip()]
|
||||
else:
|
||||
channels = None
|
||||
|
||||
raw_batch_size = self.conf.get("batch_size", INDEX_BATCH_SIZE)
|
||||
try:
|
||||
batch_size = int(raw_batch_size)
|
||||
except (TypeError, ValueError):
|
||||
batch_size = INDEX_BATCH_SIZE
|
||||
if batch_size <= 0:
|
||||
batch_size = INDEX_BATCH_SIZE
|
||||
|
||||
self.connector = SlackConnector(
|
||||
channels=channels or None,
|
||||
channel_regex_enabled=bool(self.conf.get("channel_regex_enabled", False)),
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
credentials = self.conf.get("credentials") or {}
|
||||
if not credentials.get("slack_bot_token"):
|
||||
raise ValueError("Slack connector is missing the bot token credential.")
|
||||
|
||||
credentials_provider = StaticCredentialsProvider(
|
||||
tenant_id=task["tenant_id"],
|
||||
connector_name=DocumentSource.SLACK,
|
||||
credential_json=credentials,
|
||||
)
|
||||
self.connector.set_credentials_provider(credentials_provider)
|
||||
self.connector.validate_connector_settings()
|
||||
|
||||
poll_start = task["poll_range_start"]
|
||||
if task["reindex"] == "1" or not poll_start:
|
||||
document_generator = self.connector.load_from_state()
|
||||
_begin_info = "totally"
|
||||
else:
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
document_generator = self.connector.poll_source(poll_start.timestamp(), end_time)
|
||||
_begin_info = f"from {poll_start}"
|
||||
|
||||
self.log_connection(
|
||||
"Slack",
|
||||
f"channels({', '.join(channels) if channels else 'all'})",
|
||||
task,
|
||||
)
|
||||
return document_generator
|
||||
|
||||
|
||||
class Teams(SyncBase):
|
||||
|
||||
Reference in New Issue
Block a user