mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Feat: enable sync deleted files for Gmail && fix google drive issues (#14462)
### What problem does this PR solve? Feat: enable sync deleted files for Gmail && fix google drive issues ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: bill <yibie_jingnian@163.com> Co-authored-by: balibabu <assassin_cike@163.com>
This commit is contained in:
@@ -159,6 +159,7 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
|
||||
|
||||
self._creds: OAuthCredentials | ServiceAccountCredentials | None = None
|
||||
self._creds_dict: dict[str, Any] | None = None
|
||||
self._all_drive_ids_cache: set[str] | None = None
|
||||
|
||||
# ids of folders and shared drives that have been traversed
|
||||
self._retrieved_folder_and_drive_ids: set[str] = set()
|
||||
@@ -211,6 +212,7 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
|
||||
self.include_files_shared_with_me = True
|
||||
|
||||
self._creds_dict = new_creds_dict
|
||||
self._all_drive_ids_cache = None
|
||||
|
||||
return new_creds_dict
|
||||
|
||||
@@ -249,7 +251,11 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
|
||||
return user_emails
|
||||
|
||||
def get_all_drive_ids(self) -> set[str]:
|
||||
return self._get_all_drives_for_user(self.primary_admin_email)
|
||||
if self._all_drive_ids_cache is None:
|
||||
self._all_drive_ids_cache = self._get_all_drives_for_user(
|
||||
self.primary_admin_email
|
||||
)
|
||||
return set(self._all_drive_ids_cache)
|
||||
|
||||
def _get_all_drives_for_user(self, user_email: str) -> set[str]:
|
||||
drive_service = get_drive_service(self.creds, user_email)
|
||||
@@ -265,7 +271,14 @@ class GoogleDriveConnector(SlimConnectorWithPermSync, CheckpointedConnectorWithP
|
||||
all_drive_ids.add(drive["id"])
|
||||
|
||||
if not all_drive_ids:
|
||||
self.logger.warning("No drives found even though indexing shared drives was requested.")
|
||||
if self._requested_shared_drive_ids:
|
||||
self.logger.warning(
|
||||
"No shared drives found for user %s while resolving requested shared drives.",
|
||||
user_email,
|
||||
)
|
||||
elif self.include_shared_drives:
|
||||
log_fn = self.logger.warning if is_service_account else self.logger.info
|
||||
log_fn("No shared drives found for user %s.", user_email)
|
||||
|
||||
return all_drive_ids
|
||||
|
||||
|
||||
@@ -85,9 +85,19 @@ def _get_google_service(
|
||||
if isinstance(creds, ServiceAccountCredentials):
|
||||
# NOTE: https://developers.google.com/identity/protocols/oauth2/service-account#error-codes
|
||||
creds = creds.with_subject(user_email)
|
||||
service = build(service_name, service_version, credentials=creds)
|
||||
service = build(
|
||||
service_name,
|
||||
service_version,
|
||||
credentials=creds,
|
||||
cache_discovery=False,
|
||||
)
|
||||
elif isinstance(creds, OAuthCredentials):
|
||||
service = build(service_name, service_version, credentials=creds)
|
||||
service = build(
|
||||
service_name,
|
||||
service_version,
|
||||
credentials=creds,
|
||||
cache_discovery=False,
|
||||
)
|
||||
|
||||
return service
|
||||
|
||||
|
||||
@@ -577,6 +577,8 @@ class Gmail(SyncBase):
|
||||
task["connector_id"],
|
||||
)
|
||||
|
||||
file_list = None
|
||||
|
||||
# Decide between full reindex and incremental polling by time range.
|
||||
if task["reindex"] == "1" or not task.get("poll_range_start"):
|
||||
start_time = None
|
||||
@@ -596,13 +598,17 @@ class Gmail(SyncBase):
|
||||
end_time = datetime.now(timezone.utc).timestamp()
|
||||
_begin_info = f"from {poll_start}"
|
||||
document_generator = self.connector.poll_source(start_time, end_time)
|
||||
if self.conf.get("sync_deleted_files"):
|
||||
file_list = []
|
||||
for slim_batch in self.connector.retrieve_all_slim_docs_perm_sync():
|
||||
file_list.extend(slim_batch)
|
||||
|
||||
try:
|
||||
admin_email = self.connector.primary_admin_email
|
||||
except RuntimeError:
|
||||
admin_email = "unknown"
|
||||
self.log_connection("Gmail", f"as {admin_email}", task)
|
||||
return document_generator
|
||||
return document_generator, file_list
|
||||
|
||||
|
||||
class Dropbox(SyncBase):
|
||||
@@ -671,7 +677,6 @@ class GoogleDrive(SyncBase):
|
||||
|
||||
if self.conf.get("sync_deleted_files"):
|
||||
file_list = []
|
||||
logging.info("Syncing deleted files (connector_id=%s)", task["connector_id"])
|
||||
SlimDoc = namedtuple('SlimDoc', ['id'])
|
||||
|
||||
# Add observability timing so operators can track the O(N) cost
|
||||
|
||||
@@ -61,6 +61,9 @@ export const DataSourceFeatureVisibilityMap = {
|
||||
[DataSourceKey.GOOGLE_DRIVE]: {
|
||||
syncDeletedFiles: true,
|
||||
},
|
||||
[DataSourceKey.GMAIL]: {
|
||||
syncDeletedFiles: true,
|
||||
},
|
||||
[DataSourceKey.CONFLUENCE]: {
|
||||
syncDeletedFiles: true,
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user