diff --git a/common/data_source/confluence_connector.py b/common/data_source/confluence_connector.py index d2494c3de7..58a7d2f82b 100644 --- a/common/data_source/confluence_connector.py +++ b/common/data_source/confluence_connector.py @@ -1310,7 +1310,7 @@ class ConfluenceConnector( self._confluence_client: OnyxConfluence | None = None self._low_timeout_confluence_client: OnyxConfluence | None = None self._fetched_titles: set[str] = set() - self.allow_images = False + self.allow_images = True # Track document names to detect duplicates self._document_name_counts: dict[str, int] = {} self._document_name_paths: dict[str, list[str]] = {} @@ -1597,7 +1597,7 @@ class ConfluenceConnector( id=page_url, source=DocumentSource.CONFLUENCE, semantic_identifier=semantic_identifier, - extension=".html", # Confluence pages are HTML + extension=".txt", # Confluence pages are HTML blob=page_content.encode("utf-8"), # Encode page content as bytes doc_updated_at=datetime_from_string(page["version"]["when"]), size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index 044c7484df..87bb8af9b2 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -275,6 +275,7 @@ class Confluence(SyncBase): space=space, page_id=page_id, index_recursively=index_recursively, + ) credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"],