From b0a45809ffee4444da070075252d3251fbe8742a Mon Sep 17 00:00:00 2001 From: kpdev <156195510+kiannidev@users.noreply.github.com> Date: Sun, 7 Jun 2026 18:56:47 -0700 Subject: [PATCH] fix(onedrive): normalize folder_path for Graph delta URL (#15503) Prepend a leading slash and reject `..` segments so scoped OneDrive delta queries use `root:/path:/delta` instead of `root:path:/delta`. Fixes #15500 ### What problem does this PR solve? The OneDrive connector builds Microsoft Graph delta URLs from optional `config.folder_path`. When users enter a path without a leading slash (e.g. `Documents/Reports` instead of `/Documents/Reports`), the connector produces a malformed URL such as `root:Documents/Reports:/delta`. Per [Microsoft Graph path-based addressing](https://learn.microsoft.com/en-us/graph/onedrive-addressing-driveitems), the segment after `root:` must start with `/` (e.g. `root:/Documents/Reports:/delta`). Sync and validation then fail or return no documents, which is hard to diagnose from the UI because the optional folder field does not enforce the format. This PR normalizes `folder_path` at connector construction time (prepend `/`, trim whitespace and trailing slashes) and rejects `..` segments before any Graph request is made. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- common/data_source/onedrive_connector.py | 17 +++++- .../test_onedrive_connector_unit.py | 60 +++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/common/data_source/onedrive_connector.py b/common/data_source/onedrive_connector.py index ffe26e82bf..ef5353c919 100644 --- a/common/data_source/onedrive_connector.py +++ b/common/data_source/onedrive_connector.py @@ -32,6 +32,21 @@ _SUPPORTED_EXTENSIONS = { } +def _normalize_folder_path(folder_path: str | None) -> str | None: + """Normalize Graph path-based addressing segment (root:{path}:/delta).""" + if folder_path is None: + return None + path = folder_path.strip() + if not path: + return None + segments = [segment for segment in path.split("/") if segment] + if ".." in segments: + raise ConnectorValidationError("folder_path must not contain '..' segments.") + if not segments: + return None + return "/" + "/".join(segments) + + class OneDriveCheckpoint(ConnectorCheckpoint): """OneDrive-specific checkpoint tracking delta links per drive.""" delta_links: dict[str, str] | None = None @@ -52,7 +67,7 @@ class OneDriveConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPerm folder_path: str | None = None, ) -> None: self.batch_size = batch_size - self.folder_path = folder_path # optional sub-folder filter, e.g. "/Documents" + self.folder_path = _normalize_folder_path(folder_path) self._access_token: str | None = None self._tenant_id: str | None = None diff --git a/test/unit_test/data_source/test_onedrive_connector_unit.py b/test/unit_test/data_source/test_onedrive_connector_unit.py index 7dfed16993..63ff77b6aa 100644 --- a/test/unit_test/data_source/test_onedrive_connector_unit.py +++ b/test/unit_test/data_source/test_onedrive_connector_unit.py @@ -7,6 +7,7 @@ from common.data_source.onedrive_connector import OneDriveConnector, OneDriveChe from common.data_source.models import SlimDocument from common.data_source.exceptions import ( ConnectorMissingCredentialError, + ConnectorValidationError, InsufficientPermissionsError, UnexpectedValidationError, ) @@ -18,6 +19,65 @@ _GOOD_CREDS = { "client_secret": "secret-xyz", } +_GRAPH_BASE = "https://graph.microsoft.com/v1.0" + + +# --------------------------------------------------------------------------- +# folder_path / _delta_url +# --------------------------------------------------------------------------- + +@pytest.mark.p2 +def test_folder_path_prepends_leading_slash_for_delta_url(): + connector = OneDriveConnector(folder_path="Documents/Reports") + assert connector.folder_path == "/Documents/Reports" + assert connector._delta_url("drive-1") == ( + f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta" + ) + + +@pytest.mark.p2 +def test_folder_path_preserves_leading_slash(): + connector = OneDriveConnector(folder_path="/Documents/Reports/") + assert connector.folder_path == "/Documents/Reports" + assert connector._delta_url("drive-1") == ( + f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta" + ) + + +@pytest.mark.p2 +def test_folder_path_rejects_parent_segments(): + with pytest.raises(ConnectorValidationError, match="\\.\\."): + OneDriveConnector(folder_path="/Documents/../secret") + + +@pytest.mark.p2 +def test_folder_path_normalizes_consecutive_slashes(): + connector = OneDriveConnector(folder_path="//Documents//Reports") + assert connector.folder_path == "/Documents/Reports" + assert connector._delta_url("drive-1") == ( + f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta" + ) + + +@pytest.mark.p2 +def test_folder_path_strips_whitespace(): + connector = OneDriveConnector(folder_path=" Documents/Reports ") + assert connector.folder_path == "/Documents/Reports" + + +@pytest.mark.p2 +def test_folder_path_root_uses_drive_root_delta(): + connector = OneDriveConnector(folder_path="/") + assert connector.folder_path is None + assert connector._delta_url("drive-1") == f"{_GRAPH_BASE}/drives/drive-1/root/delta" + + +@pytest.mark.p2 +def test_folder_path_double_slash_only_uses_drive_root_delta(): + connector = OneDriveConnector(folder_path="//") + assert connector.folder_path is None + assert connector._delta_url("drive-1") == f"{_GRAPH_BASE}/drives/drive-1/root/delta" + # --------------------------------------------------------------------------- # load_credentials