mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
fix(onedrive): normalize folder_path for Graph delta URL (#15503)
Prepend a leading slash and reject `..` segments so scoped OneDrive delta queries use `root:/path:/delta` instead of `root:path:/delta`. Fixes #15500 ### What problem does this PR solve? The OneDrive connector builds Microsoft Graph delta URLs from optional `config.folder_path`. When users enter a path without a leading slash (e.g. `Documents/Reports` instead of `/Documents/Reports`), the connector produces a malformed URL such as `root:Documents/Reports:/delta`. Per [Microsoft Graph path-based addressing](https://learn.microsoft.com/en-us/graph/onedrive-addressing-driveitems), the segment after `root:` must start with `/` (e.g. `root:/Documents/Reports:/delta`). Sync and validation then fail or return no documents, which is hard to diagnose from the UI because the optional folder field does not enforce the format. This PR normalizes `folder_path` at connector construction time (prepend `/`, trim whitespace and trailing slashes) and rejects `..` segments before any Graph request is made. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -32,6 +32,21 @@ _SUPPORTED_EXTENSIONS = {
|
||||
}
|
||||
|
||||
|
||||
def _normalize_folder_path(folder_path: str | None) -> str | None:
|
||||
"""Normalize Graph path-based addressing segment (root:{path}:/delta)."""
|
||||
if folder_path is None:
|
||||
return None
|
||||
path = folder_path.strip()
|
||||
if not path:
|
||||
return None
|
||||
segments = [segment for segment in path.split("/") if segment]
|
||||
if ".." in segments:
|
||||
raise ConnectorValidationError("folder_path must not contain '..' segments.")
|
||||
if not segments:
|
||||
return None
|
||||
return "/" + "/".join(segments)
|
||||
|
||||
|
||||
class OneDriveCheckpoint(ConnectorCheckpoint):
|
||||
"""OneDrive-specific checkpoint tracking delta links per drive."""
|
||||
delta_links: dict[str, str] | None = None
|
||||
@@ -52,7 +67,7 @@ class OneDriveConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPerm
|
||||
folder_path: str | None = None,
|
||||
) -> None:
|
||||
self.batch_size = batch_size
|
||||
self.folder_path = folder_path # optional sub-folder filter, e.g. "/Documents"
|
||||
self.folder_path = _normalize_folder_path(folder_path)
|
||||
self._access_token: str | None = None
|
||||
self._tenant_id: str | None = None
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from common.data_source.onedrive_connector import OneDriveConnector, OneDriveChe
|
||||
from common.data_source.models import SlimDocument
|
||||
from common.data_source.exceptions import (
|
||||
ConnectorMissingCredentialError,
|
||||
ConnectorValidationError,
|
||||
InsufficientPermissionsError,
|
||||
UnexpectedValidationError,
|
||||
)
|
||||
@@ -18,6 +19,65 @@ _GOOD_CREDS = {
|
||||
"client_secret": "secret-xyz",
|
||||
}
|
||||
|
||||
_GRAPH_BASE = "https://graph.microsoft.com/v1.0"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# folder_path / _delta_url
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_folder_path_prepends_leading_slash_for_delta_url():
|
||||
connector = OneDriveConnector(folder_path="Documents/Reports")
|
||||
assert connector.folder_path == "/Documents/Reports"
|
||||
assert connector._delta_url("drive-1") == (
|
||||
f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_folder_path_preserves_leading_slash():
|
||||
connector = OneDriveConnector(folder_path="/Documents/Reports/")
|
||||
assert connector.folder_path == "/Documents/Reports"
|
||||
assert connector._delta_url("drive-1") == (
|
||||
f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_folder_path_rejects_parent_segments():
|
||||
with pytest.raises(ConnectorValidationError, match="\\.\\."):
|
||||
OneDriveConnector(folder_path="/Documents/../secret")
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_folder_path_normalizes_consecutive_slashes():
|
||||
connector = OneDriveConnector(folder_path="//Documents//Reports")
|
||||
assert connector.folder_path == "/Documents/Reports"
|
||||
assert connector._delta_url("drive-1") == (
|
||||
f"{_GRAPH_BASE}/drives/drive-1/root:/Documents/Reports:/delta"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_folder_path_strips_whitespace():
|
||||
connector = OneDriveConnector(folder_path=" Documents/Reports ")
|
||||
assert connector.folder_path == "/Documents/Reports"
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_folder_path_root_uses_drive_root_delta():
|
||||
connector = OneDriveConnector(folder_path="/")
|
||||
assert connector.folder_path is None
|
||||
assert connector._delta_url("drive-1") == f"{_GRAPH_BASE}/drives/drive-1/root/delta"
|
||||
|
||||
|
||||
@pytest.mark.p2
|
||||
def test_folder_path_double_slash_only_uses_drive_root_delta():
|
||||
connector = OneDriveConnector(folder_path="//")
|
||||
assert connector.folder_path is None
|
||||
assert connector._delta_url("drive-1") == f"{_GRAPH_BASE}/drives/drive-1/root/delta"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# load_credentials
|
||||
|
||||
Reference in New Issue
Block a user