diff --git a/common/data_source/onedrive_connector.py b/common/data_source/onedrive_connector.py index ffe26e82bf..ef5353c919 100644 --- a/common/data_source/onedrive_connector.py +++ b/common/data_source/onedrive_connector.py @@ -32,6 +32,21 @@ _SUPPORTED_EXTENSIONS = { } +def _normalize_folder_path(folder_path: str | None) -> str | None: + """Normalize Graph path-based addressing segment (root:{path}:/delta).""" + if folder_path is None: + return None + path = folder_path.strip() + if not path: + return None + segments = [segment for segment in path.split("/") if segment] + if ".." in segments: + raise ConnectorValidationError("folder_path must not contain '..' segments.") + if not segments: + return None + return "/" + "/".join(segments) + + class OneDriveCheckpoint(ConnectorCheckpoint): """OneDrive-specific checkpoint tracking delta links per drive.""" delta_links: dict[str, str] | None = None @@ -52,7 +67,7 @@ class OneDriveConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPerm folder_path: str | None = None, ) -> None: self.batch_size = batch_size - self.folder_path = folder_path # optional sub-folder filter, e.g. "/Documents" + self.folder_path = _normalize_folder_path(folder_path) self._access_token: str | None = None self._tenant_id: str | None = None diff --git a/test/unit_test/data_source/test_onedrive_connector_unit.py b/test/unit_test/data_source/test_onedrive_connector_unit.py index 7dfed16993..63ff77b6aa 100644 --- a/test/unit_test/data_source/test_onedrive_connector_unit.py +++ b/test/unit_test/data_source/test_onedrive_connector_unit.py @@ -7,6 +7,7 @@ from common.data_source.onedrive_connector import OneDriveConnector, OneDriveChe from common.data_source.models import SlimDocument from common.data_source.exceptions import ( ConnectorMissingCredentialError, + ConnectorValidationError, InsufficientPermissionsError, UnexpectedValidationError, ) @@ -18,6 +19,65 @@ _GOOD_CREDS = { "client_secret": "secret-xyz", } +_GRAPH_BASE = "https://graph.microsoft.com/v1.0" + + +# --------------------------------------------------------------------------- +# folder_path / _delta_url +# --------------------------------------------------------------------------- + +@pytest.mark.p2 +def test_folder_path_prepends_leading_slash_for_delta_url(): + connector = OneDriveConnector(folder_path="Documents/Reports") + assert connector.folder_path == "/Documents/Reports" + assert connector._delta_url("drive-1") == ( + f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta" + ) + + +@pytest.mark.p2 +def test_folder_path_preserves_leading_slash(): + connector = OneDriveConnector(folder_path="/Documents/Reports/") + assert connector.folder_path == "/Documents/Reports" + assert connector._delta_url("drive-1") == ( + f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta" + ) + + +@pytest.mark.p2 +def test_folder_path_rejects_parent_segments(): + with pytest.raises(ConnectorValidationError, match="\\.\\."): + OneDriveConnector(folder_path="/Documents/../secret") + + +@pytest.mark.p2 +def test_folder_path_normalizes_consecutive_slashes(): + connector = OneDriveConnector(folder_path="//Documents//Reports") + assert connector.folder_path == "/Documents/Reports" + assert connector._delta_url("drive-1") == ( + f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta" + ) + + +@pytest.mark.p2 +def test_folder_path_strips_whitespace(): + connector = OneDriveConnector(folder_path=" Documents/Reports ") + assert connector.folder_path == "/Documents/Reports" + + +@pytest.mark.p2 +def test_folder_path_root_uses_drive_root_delta(): + connector = OneDriveConnector(folder_path="/") + assert connector.folder_path is None + assert connector._delta_url("drive-1") == f"{_GRAPH_BASE}/drives/drive-1/root/delta" + + +@pytest.mark.p2 +def test_folder_path_double_slash_only_uses_drive_root_delta(): + connector = OneDriveConnector(folder_path="//") + assert connector.folder_path is None + assert connector._delta_url("drive-1") == f"{_GRAPH_BASE}/drives/drive-1/root/delta" + # --------------------------------------------------------------------------- # load_credentials