Feat: enable sync deleted files for Gmail && fix google drive issues (#14462)

### What problem does this PR solve?

Feat: enable sync deleted files for Gmail && fix google drive issues

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: bill <yibie_jingnian@163.com>
Co-authored-by: balibabu <assassin_cike@163.com>
This commit is contained in:
Magicbook1108
2026-04-29 17:03:56 +08:00
committed by GitHub
parent a736948493
commit e0b3070012
4 changed files with 37 additions and 6 deletions

View File

@@ -159,6 +159,7 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
self._creds: OAuthCredentials | ServiceAccountCredentials | None = None
self._creds_dict: dict[str, Any] | None = None
self._all_drive_ids_cache: set[str] | None = None
# ids of folders and shared drives that have been traversed
self._retrieved_folder_and_drive_ids: set[str] = set()
@@ -211,6 +212,7 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
self.include_files_shared_with_me = True
self._creds_dict = new_creds_dict
self._all_drive_ids_cache = None
return new_creds_dict
@@ -249,7 +251,11 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
return user_emails
def get_all_drive_ids(self) -> set[str]:
return self._get_all_drives_for_user(self.primary_admin_email)
if self._all_drive_ids_cache is None:
self._all_drive_ids_cache = self._get_all_drives_for_user(
self.primary_admin_email
)
return set(self._all_drive_ids_cache)
def _get_all_drives_for_user(self, user_email: str) -> set[str]:
drive_service = get_drive_service(self.creds, user_email)
@@ -265,7 +271,14 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
all_drive_ids.add(drive["id"])
if not all_drive_ids:
self.logger.warning("No drives found even though indexing shared drives was requested.")
if self._requested_shared_drive_ids:
self.logger.warning(
"No shared drives found for user %s while resolving requested shared drives.",
user_email,
)
elif self.include_shared_drives:
log_fn = self.logger.warning if is_service_account else self.logger.info
log_fn("No shared drives found for user %s.", user_email)
return all_drive_ids

View File

@@ -85,9 +85,19 @@ def _get_google_service(
if isinstance(creds, ServiceAccountCredentials):
# NOTE: https://developers.google.com/identity/protocols/oauth2/service-account#error-codes
creds = creds.with_subject(user_email)
service = build(service_name, service_version, credentials=creds)
service = build(
service_name,
service_version,
credentials=creds,
cache_discovery=False,
)
elif isinstance(creds, OAuthCredentials):
service = build(service_name, service_version, credentials=creds)
service = build(
service_name,
service_version,
credentials=creds,
cache_discovery=False,
)
return service

View File

@@ -577,6 +577,8 @@ class Gmail(SyncBase):
task["connector_id"],
)
file_list = None
# Decide between full reindex and incremental polling by time range.
if task["reindex"] == "1" or not task.get("poll_range_start"):
start_time = None
@@ -596,13 +598,17 @@ class Gmail(SyncBase):
end_time = datetime.now(timezone.utc).timestamp()
_begin_info = f"from {poll_start}"
document_generator = self.connector.poll_source(start_time, end_time)
if self.conf.get("sync_deleted_files"):
file_list = []
for slim_batch in self.connector.retrieve_all_slim_docs_perm_sync():
file_list.extend(slim_batch)
try:
admin_email = self.connector.primary_admin_email
except RuntimeError:
admin_email = "unknown"
self.log_connection("Gmail", f"as {admin_email}", task)
return document_generator
return document_generator, file_list
class Dropbox(SyncBase):
@@ -671,7 +677,6 @@ class GoogleDrive(SyncBase):
if self.conf.get("sync_deleted_files"):
file_list = []
logging.info("Syncing deleted files (connector_id=%s)", task["connector_id"])
SlimDoc = namedtuple('SlimDoc', ['id'])
# Add observability timing so operators can track the O(N) cost

View File

@@ -61,6 +61,9 @@ export const DataSourceFeatureVisibilityMap = {
[DataSourceKey.GOOGLE_DRIVE]: {
syncDeletedFiles: true,
},
[DataSourceKey.GMAIL]: {
syncDeletedFiles: true,
},
[DataSourceKey.CONFLUENCE]: {
syncDeletedFiles: true,
},