From 675810e0cf4bfdefc945ddbf7b15a705e9fdb64b Mon Sep 17 00:00:00 2001 From: Magicbook1108 Date: Tue, 10 Mar 2026 15:02:24 +0800 Subject: [PATCH] Refact: optimize confluence performance (#13497) ### What problem does this PR solve? Refact: optimize confluence performance #13494 ### Type of change - [x] Refactoring --- common/data_source/confluence_connector.py | 4 ++-- rag/svr/sync_data_source.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/common/data_source/confluence_connector.py b/common/data_source/confluence_connector.py index d2494c3de7..58a7d2f82b 100644 --- a/common/data_source/confluence_connector.py +++ b/common/data_source/confluence_connector.py @@ -1310,7 +1310,7 @@ class ConfluenceConnector( self._confluence_client: OnyxConfluence | None = None self._low_timeout_confluence_client: OnyxConfluence | None = None self._fetched_titles: set[str] = set() - self.allow_images = False + self.allow_images = True # Track document names to detect duplicates self._document_name_counts: dict[str, int] = {} self._document_name_paths: dict[str, list[str]] = {} @@ -1597,7 +1597,7 @@ class ConfluenceConnector( id=page_url, source=DocumentSource.CONFLUENCE, semantic_identifier=semantic_identifier, - extension=".html", # Confluence pages are HTML + extension=".txt", # Confluence pages are HTML blob=page_content.encode("utf-8"), # Encode page content as bytes doc_updated_at=datetime_from_string(page["version"]["when"]), size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index 044c7484df..87bb8af9b2 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -275,6 +275,7 @@ class Confluence(SyncBase): space=space, page_id=page_id, index_recursively=index_recursively, + ) credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"],