feat(zendesk): support deleted-file sync (#14487)

### What problem does this PR solve? Refs #14362. This PR enables syncing deleted files for Zendesk data sources. Previously, Zendesk incremental sync never returned a slim remote snapshot to the shared stale-document cleanup path, so deleted remote Zendesk records could remain in RAGFlow. The existing Zendesk slim snapshot also included records that ingestion intentionally skips, such as draft articles, articles without bodies, skipped-label articles, empty-body articles, and tickets with `status == "deleted"`. This PR: - exposes the deleted-file sync option for Zendesk in the data source UI - returns Zendesk slim snapshots during incremental sync when `sync_deleted_files` is enabled - reuses Zendesk indexability rules so cleanup compares against the same records ingestion can materialize - adds start/end logs around Zendesk slim snapshot collection for operational visibility Per maintainer request, this PR contains no test-case changes. Manual verification recording will be provided separately. Validation: - `uv run ruff check common/data_source/zendesk_connector.py rag/svr/sync_data_source.py` - `uv run pytest test/unit_test/rag/test_sync_data_source.py -q` - `./node_modules/.bin/eslint src/pages/user-setting/data-source/constant/index.tsx` ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
2026-06-29 23:41:12 +08:00 · 2026-04-30 03:44:05 -03:00
parent 8f75e52bbf
commit 17eda04b8d
3 changed files with 42 additions and 9 deletions
--- a/common/data_source/zendesk_connector.py
+++ b/common/data_source/zendesk_connector.py
@@ -246,6 +246,18 @@ def _article_to_document(
    )


+def _is_indexable_article(article: dict[str, Any]) -> bool:
+    body = article.get("body")
+    return (
+        bool(body)
+        and not article.get("draft")
+        and not any(
+            label in ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS
+            for label in article.get("label_names") or []
+        )
+    )
+
+
 def _get_comment_text(
    comment: dict[str, Any],
    author_map: dict[str, BasicExpertInfo],
@@ -333,6 +345,10 @@ def _ticket_to_document(
    )


+def _is_indexable_ticket(ticket: dict[str, Any]) -> bool:
+    return ticket.get("status") != "deleted"
+
+
 class ZendeskConnectorCheckpoint(ConnectorCheckpoint):
    # We use cursor-based paginated retrieval for articles
    after_cursor_articles: str | None
@@ -419,14 +435,7 @@ class ZendeskConnector(
        has_more = response.has_more
        after_cursor = response.meta.get("after_cursor")
        for article in articles:
-            if (
-                article.get("body") is None
-                or article.get("draft")
-                or any(
-                    label in ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS
-                    for label in article.get("label_names", [])
-                )
-            ):
+            if not _is_indexable_article(article):
                continue

            try:
@@ -498,7 +507,7 @@ class ZendeskConnector(
        has_more = ticket_response.has_more
        next_start_time = ticket_response.meta["end_time"]
        for ticket in tickets:
-            if ticket.get("status") == "deleted":
+            if not _is_indexable_ticket(ticket):
                continue

            try:
@@ -559,6 +568,8 @@ class ZendeskConnector(
        if self.content_type == "articles":
            articles = _get_articles(self.client)
            for article in articles:
+                if not _is_indexable_article(article):
+                    continue
                slim_doc_batch.append(
                    SlimDocument(
                        id=f"article:{article['id']}",
@@ -570,6 +581,8 @@ class ZendeskConnector(
        elif self.content_type == "tickets":
            tickets = _get_tickets(self.client)
            for ticket in tickets:
+                if not _is_indexable_ticket(ticket):
+                    continue
                slim_doc_batch.append(
                    SlimDocument(
                        id=f"zendesk_ticket_{ticket['id']}",
--- a/rag/svr/sync_data_source.py
+++ b/rag/svr/sync_data_source.py
@@ -1232,11 +1232,26 @@ class Zendesk(SyncBase):
        self.connector.load_credentials(self.conf["credentials"])

        end_time = datetime.now(timezone.utc).timestamp()
+        file_list = None
        if task["reindex"] == "1" or not task.get("poll_range_start"):
            start_time = 0
            _begin_info = "totally"
        else:
            start_time = task["poll_range_start"].timestamp()
+            if self.conf.get("sync_deleted_files"):
+                logging.info(
+                    "[Zendesk] Syncing deleted files via slim snapshot (connector_id=%s)",
+                    task.get("connector_id"),
+                )
+                snapshot_start = time.perf_counter()
+                file_list = []
+                for slim_batch in self.connector.retrieve_all_slim_docs_perm_sync():
+                    file_list.extend(slim_batch)
+                logging.info(
+                    "[Zendesk] Slim snapshot fetched %d docs in %.2f seconds",
+                    len(file_list),
+                    time.perf_counter() - snapshot_start,
+                )
            _begin_info = f"from {task['poll_range_start']}"

        raw_batch_size = (
@@ -1298,6 +1313,8 @@ class Zendesk(SyncBase):

        self.log_connection("Zendesk", f"subdomain({self.conf['credentials'].get('zendesk_subdomain')})", task)

+        if file_list is not None:
+            return wrapper(), file_list
        return wrapper()


--- a/web/src/pages/user-setting/data-source/constant/index.tsx
+++ b/web/src/pages/user-setting/data-source/constant/index.tsx
@@ -105,6 +105,9 @@ export const DataSourceFeatureVisibilityMap: Partial<
  [DataSourceKey.AIRTABLE]: {
    syncDeletedFiles: true,
  },
+  [DataSourceKey.ZENDESK]: {
+    syncDeletedFiles: true,
+  },
  [DataSourceKey.SEAFILE]: {
    syncDeletedFiles: true,
  },