fix(onedrive): normalize folder_path for Graph delta URL (#15503)

Prepend a leading slash and reject `..` segments so scoped OneDrive
delta queries use `root:/path:/delta` instead of `root:path:/delta`.

Fixes #15500

### What problem does this PR solve?

The OneDrive connector builds Microsoft Graph delta URLs from optional
`config.folder_path`. When users enter a path without a leading slash
(e.g. `Documents/Reports` instead of `/Documents/Reports`), the
connector produces a malformed URL such as
`root:Documents/Reports:/delta`. Per [Microsoft Graph path-based
addressing](https://learn.microsoft.com/en-us/graph/onedrive-addressing-driveitems),
the segment after `root:` must start with `/` (e.g.
`root:/Documents/Reports:/delta`). Sync and validation then fail or
return no documents, which is hard to diagnose from the UI because the
optional folder field does not enforce the format.

This PR normalizes `folder_path` at connector construction time (prepend
`/`, trim whitespace and trailing slashes) and rejects `..` segments
before any Graph request is made.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
kpdev
2026-06-07 18:56:47 -07:00
committed by GitHub
parent 5a04ac0864
commit b0a45809ff
2 changed files with 76 additions and 1 deletions

View File

@@ -32,6 +32,21 @@ _SUPPORTED_EXTENSIONS = {
}
def _normalize_folder_path(folder_path: str | None) -> str | None:
"""Normalize Graph path-based addressing segment (root:{path}:/delta)."""
if folder_path is None:
return None
path = folder_path.strip()
if not path:
return None
segments = [segment for segment in path.split("/") if segment]
if ".." in segments:
raise ConnectorValidationError("folder_path must not contain '..' segments.")
if not segments:
return None
return "/" + "/".join(segments)
class OneDriveCheckpoint(ConnectorCheckpoint):
"""OneDrive-specific checkpoint tracking delta links per drive."""
delta_links: dict[str, str] | None = None
@@ -52,7 +67,7 @@ class OneDriveConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPerm
folder_path: str | None = None,
) -> None:
self.batch_size = batch_size
self.folder_path = folder_path # optional sub-folder filter, e.g. "/Documents"
self.folder_path = _normalize_folder_path(folder_path)
self._access_token: str | None = None
self._tenant_id: str | None = None

View File

@@ -7,6 +7,7 @@ from common.data_source.onedrive_connector import OneDriveConnector, OneDriveChe
from common.data_source.models import SlimDocument
from common.data_source.exceptions import (
ConnectorMissingCredentialError,
ConnectorValidationError,
InsufficientPermissionsError,
UnexpectedValidationError,
)
@@ -18,6 +19,65 @@ _GOOD_CREDS = {
"client_secret": "secret-xyz",
}
_GRAPH_BASE = "https://graph.microsoft.com/v1.0"
# ---------------------------------------------------------------------------
# folder_path / _delta_url
# ---------------------------------------------------------------------------
@pytest.mark.p2
def test_folder_path_prepends_leading_slash_for_delta_url():
connector = OneDriveConnector(folder_path="Documents/Reports")
assert connector.folder_path == "/Documents/Reports"
assert connector._delta_url("drive-1") == (
f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta"
)
@pytest.mark.p2
def test_folder_path_preserves_leading_slash():
connector = OneDriveConnector(folder_path="/Documents/Reports/")
assert connector.folder_path == "/Documents/Reports"
assert connector._delta_url("drive-1") == (
f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta"
)
@pytest.mark.p2
def test_folder_path_rejects_parent_segments():
with pytest.raises(ConnectorValidationError, match="\\.\\."):
OneDriveConnector(folder_path="/Documents/../secret")
@pytest.mark.p2
def test_folder_path_normalizes_consecutive_slashes():
connector = OneDriveConnector(folder_path="//Documents//Reports")
assert connector.folder_path == "/Documents/Reports"
assert connector._delta_url("drive-1") == (
f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta"
)
@pytest.mark.p2
def test_folder_path_strips_whitespace():
connector = OneDriveConnector(folder_path=" Documents/Reports ")
assert connector.folder_path == "/Documents/Reports"
@pytest.mark.p2
def test_folder_path_root_uses_drive_root_delta():
connector = OneDriveConnector(folder_path="/")
assert connector.folder_path is None
assert connector._delta_url("drive-1") == f"{_GRAPH_BASE}/drives/drive-1/root/delta"
@pytest.mark.p2
def test_folder_path_double_slash_only_uses_drive_root_delta():
connector = OneDriveConnector(folder_path="//")
assert connector.folder_path is None
assert connector._delta_url("drive-1") == f"{_GRAPH_BASE}/drives/drive-1/root/delta"
# ---------------------------------------------------------------------------
# load_credentials