diff --git a/common/data_source/sharepoint_connector.py b/common/data_source/sharepoint_connector.py index e5684023c1..ab3384d702 100644 --- a/common/data_source/sharepoint_connector.py +++ b/common/data_source/sharepoint_connector.py @@ -1,119 +1,270 @@ -"""SharePoint connector""" +"""SharePoint connector + +Ingests files from SharePoint document libraries via the Microsoft Graph API +(Office365-REST-Python-Client). Authentication uses MSAL client-credentials +(app-only) flow, so it requires an Azure AD app with the ``Sites.Read.All`` and +``Files.Read.All`` application permissions (admin-consented). + +The connector implements the checkpointed-connector interface used by the sync +worker: ``load_from_checkpoint`` walks every document library under the +configured site, downloads each file, and yields blob-based ``Document`` +objects. Incremental syncs are bounded by the file ``lastModifiedDateTime``. +""" + +import logging +from datetime import datetime, timezone +from typing import Any, Generator -from typing import Any import msal from office365.graph_client import GraphClient -from office365.runtime.client_request import ClientRequestException -from office365.sharepoint.client_context import ClientContext from common.data_source.config import INDEX_BATCH_SIZE -from common.data_source.exceptions import ConnectorValidationError, ConnectorMissingCredentialError +from common.data_source.exceptions import ( + ConnectorMissingCredentialError, + ConnectorValidationError, +) from common.data_source.interfaces import ( CheckpointedConnectorWithPermSync, SecondsSinceUnixEpoch, - SlimConnectorWithPermSync + SlimConnectorWithPermSync, ) from common.data_source.models import ( - ConnectorCheckpoint + ConnectorCheckpoint, + ConnectorFailure, + Document, + DocumentFailure, + SlimDocument, ) +GRAPH_SCOPES = ["https://graph.microsoft.com/.default"] + class SharePointConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync): - """SharePoint connector for accessing SharePoint sites and documents""" + """SharePoint connector for accessing SharePoint sites and documents.""" def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None: self.batch_size = batch_size - self.sharepoint_client = None - self.graph_client = None + self.graph_client: GraphClient | None = None + self._site_url: str | None = None + + # -- credentials --------------------------------------------------------- def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: - """Load SharePoint credentials""" - try: - tenant_id = credentials.get("tenant_id") - client_id = credentials.get("client_id") - client_secret = credentials.get("client_secret") - site_url = credentials.get("site_url") - - if not all([tenant_id, client_id, client_secret, site_url]): - raise ConnectorMissingCredentialError("SharePoint credentials are incomplete") - - # Create MSAL confidential client + """Configure a Microsoft Graph client from app-only credentials. + + The token is acquired lazily through a callback (the way + ``GraphClient`` expects it), so this method performs no network call; + the first real request triggers ``acquire_token_for_client``. + """ + tenant_id = credentials.get("tenant_id") + client_id = credentials.get("client_id") + client_secret = credentials.get("client_secret") + site_url = credentials.get("site_url") + + if not all([tenant_id, client_id, client_secret, site_url]): + raise ConnectorMissingCredentialError("SharePoint credentials are incomplete") + + self._site_url = site_url + authority = f"https://login.microsoftonline.com/{tenant_id}" + + def _acquire_token() -> dict[str, Any]: app = msal.ConfidentialClientApplication( client_id=client_id, client_credential=client_secret, - authority=f"https://login.microsoftonline.com/{tenant_id}" + authority=authority, ) - - # Get access token - result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"]) - - if "access_token" not in result: - raise ConnectorMissingCredentialError("Failed to acquire SharePoint access token") - - # Create Graph client - self.graph_client = GraphClient(result["access_token"]) - - # Create SharePoint client context - self.sharepoint_client = ClientContext(site_url).with_access_token(result["access_token"]) - - return None - except Exception as e: - raise ConnectorMissingCredentialError(f"SharePoint: {e}") + token = app.acquire_token_for_client(scopes=GRAPH_SCOPES) + if "access_token" not in token: + detail = token.get("error_description") or token.get("error") or token + raise ConnectorMissingCredentialError( + f"Failed to acquire SharePoint access token: {detail}" + ) + return token + + self.graph_client = GraphClient(_acquire_token) + return None def validate_connector_settings(self) -> None: - """Validate SharePoint connector settings""" - if not self.sharepoint_client or not self.graph_client: + """Validate credentials by resolving the configured site.""" + if self.graph_client is None or not self._site_url: raise ConnectorMissingCredentialError("SharePoint") - + try: - # Test connection by getting site info - site = self.sharepoint_client.site.get().execute_query() + site = self.graph_client.sites.get_by_url(self._site_url).execute_query() if not site: raise ConnectorValidationError("Failed to access SharePoint site") - except ClientRequestException as e: - if "401" in str(e) or "403" in str(e): - raise ConnectorValidationError("Invalid credentials or insufficient permissions") - else: - raise ConnectorValidationError(f"SharePoint validation error: {e}") + except ConnectorValidationError: + raise + except Exception as e: + message = str(e) + if "401" in message or "403" in message: + raise ConnectorValidationError( + "Invalid credentials or insufficient permissions for SharePoint" + ) + raise ConnectorValidationError(f"SharePoint validation error: {e}") - def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> Any: - """Poll SharePoint for recent documents""" - # Simplified implementation - in production this would handle actual polling - return [] + # -- traversal helpers --------------------------------------------------- + + def _iter_drives(self): + site = self.graph_client.sites.get_by_url(self._site_url).execute_query() + return site.drives.get().execute_query() + + @staticmethod + def _is_folder(drive_item: Any) -> bool: + return "folder" in getattr(drive_item, "properties", {}) + + def _walk_files(self, root_item: Any) -> Generator[Any, None, None]: + """Depth-first walk of a drive yielding file (non-folder) driveItems.""" + stack = [root_item] + while stack: + folder = stack.pop() + children = folder.children.get().execute_query() + for child in children: + if self._is_folder(child): + stack.append(child) + else: + yield child + + @staticmethod + def _modified_dt(drive_item: Any) -> datetime | None: + value = getattr(drive_item, "last_modified_datetime", None) + if value is None: + value = getattr(drive_item, "properties", {}).get("lastModifiedDateTime") + if value is None: + return None + if isinstance(value, str): + try: + value = datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + if value.tzinfo is None: + value = value.replace(tzinfo=timezone.utc) + return value + + @staticmethod + def _composite_doc_id(drive_id: Any, drive_item: Any) -> str: + # Graph driveItem IDs are only unique within a single drive. A site can + # expose multiple document libraries (drives), so we namespace the item + # ID by drive ID to keep document identifiers globally unique. + return f"{drive_id}:{drive_item.id}" + + def _drive_item_to_document(self, drive_item: Any, drive_id: Any, drive_name: str) -> Document: + name = drive_item.name or str(drive_item.id) + content_result = drive_item.get_content().execute_query() + blob = content_result.value or b"" + if isinstance(blob, str): + blob = blob.encode("utf-8") + + extension = "" + if "." in name: + extension = "." + name.rsplit(".", 1)[1] + + size_bytes = getattr(drive_item, "properties", {}).get("size") + if not size_bytes: + size_bytes = len(blob) + + modified = self._modified_dt(drive_item) or datetime.now(timezone.utc) + + metadata = {"drive": drive_name, "drive_id": str(drive_id), "drive_item_id": str(drive_item.id)} + web_url = getattr(drive_item, "web_url", None) + if web_url: + metadata["web_url"] = web_url + + return Document( + id=self._composite_doc_id(drive_id, drive_item), + source="sharepoint", + semantic_identifier=name, + extension=extension, + blob=blob, + size_bytes=int(size_bytes), + doc_updated_at=modified, + metadata=metadata, + ) + + def _generate_documents( + self, + start: SecondsSinceUnixEpoch, + end: SecondsSinceUnixEpoch, + ) -> Generator[Document | ConnectorFailure, None, None]: + if self.graph_client is None or not self._site_url: + raise ConnectorMissingCredentialError("SharePoint") + + for drive in self._iter_drives(): + drive_name = getattr(drive, "name", None) or getattr(drive, "properties", {}).get("name", "") + drive_id = getattr(drive, "id", None) or getattr(drive, "properties", {}).get("id", "") + for drive_item in self._walk_files(drive.root): + try: + modified = self._modified_dt(drive_item) + if modified is not None: + ts = modified.timestamp() + # start is an exclusive lower bound; full reindex passes start=0. + if not (start < ts <= end): + continue + yield self._drive_item_to_document(drive_item, drive_id, drive_name) + except Exception as e: + logging.exception("SharePoint failed to process drive item") + yield ConnectorFailure( + failed_document=DocumentFailure( + document_id=self._composite_doc_id(drive_id, drive_item) + if getattr(drive_item, "id", None) is not None + else "unknown", + document_link=getattr(drive_item, "web_url", "") or "", + ), + failure_message=str(e), + exception=e, + ) + + # -- checkpointed connector interface ------------------------------------ def load_from_checkpoint( self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch, checkpoint: ConnectorCheckpoint, - ) -> Any: - """Load documents from checkpoint""" - # Simplified implementation - return [] + ) -> Generator[Document | ConnectorFailure, None, ConnectorCheckpoint]: + """Yield every file under the site as a Document, then finish. + + The whole library is enumerated in a single pass, so the returned + checkpoint always has ``has_more=False``. + """ + yield from self._generate_documents(start, end) + return ConnectorCheckpoint(has_more=False) def load_from_checkpoint_with_perm_sync( self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch, checkpoint: ConnectorCheckpoint, - ) -> Any: - """Load documents from checkpoint with permission sync""" - # Simplified implementation - return [] + ) -> Generator[Document | ConnectorFailure, None, ConnectorCheckpoint]: + """Permission-aware variant. + + SharePoint ACL -> ExternalAccess mapping is not yet wired through the + sync pipeline (the pipeline does not persist ExternalAccess), so this + currently yields the same documents as ``load_from_checkpoint``. + """ + return self.load_from_checkpoint(start, end, checkpoint) def build_dummy_checkpoint(self) -> ConnectorCheckpoint: - """Build dummy checkpoint""" - return ConnectorCheckpoint() + return ConnectorCheckpoint(has_more=True) def validate_checkpoint_json(self, checkpoint_json: str) -> ConnectorCheckpoint: - """Validate checkpoint JSON""" - # Simplified implementation - return ConnectorCheckpoint() + return ConnectorCheckpoint(has_more=True) def retrieve_all_slim_docs_perm_sync( self, callback: Any = None, - ) -> Any: - """Retrieve all simplified documents with permission sync""" - # Simplified implementation - return [] + ) -> Generator[list[SlimDocument], None, None]: + """Yield batches of slim documents (ids only) for prune/permission sync.""" + if self.graph_client is None or not self._site_url: + raise ConnectorMissingCredentialError("SharePoint") + + batch: list[SlimDocument] = [] + for drive in self._iter_drives(): + drive_id = getattr(drive, "id", None) or getattr(drive, "properties", {}).get("id", "") + for drive_item in self._walk_files(drive.root): + batch.append(SlimDocument(id=self._composite_doc_id(drive_id, drive_item))) + if len(batch) >= self.batch_size: + yield batch + batch = [] + if batch: + yield batch diff --git a/rag/svr/sync_data_source.py b/rag/svr/sync_data_source.py index a5ba395820..3b01014327 100644 --- a/rag/svr/sync_data_source.py +++ b/rag/svr/sync_data_source.py @@ -61,6 +61,7 @@ from common.data_source import ( RDBMSConnector, DingTalkAITableConnector, RestAPIConnector, + SharePointConnector, ) from common.data_source.models import ConnectorFailure, SeafileSyncScope from common.data_source.webdav_connector import WebDAVConnector @@ -932,7 +933,66 @@ class SharePoint(SyncBase): SOURCE_NAME: str = FileSource.SHAREPOINT async def _generate(self, task: dict): - pass + self.connector = SharePointConnector( + batch_size=self.conf.get("batch_size", INDEX_BATCH_SIZE), + ) + + credentials = self.conf.get("credentials") or {} + self.connector.load_credentials(credentials) + self.connector.validate_connector_settings() + + if task["reindex"] == "1" or not task["poll_range_start"]: + start_time = 0.0 + _begin_info = "totally" + else: + start_time = task["poll_range_start"].timestamp() + _begin_info = f"from {task['poll_range_start']}" + + end_time = datetime.now(timezone.utc).timestamp() + + raw_batch_size = self.conf.get("sync_batch_size") or self.conf.get("batch_size") or INDEX_BATCH_SIZE + try: + batch_size = int(raw_batch_size) + except (TypeError, ValueError): + batch_size = INDEX_BATCH_SIZE + if batch_size <= 0: + batch_size = INDEX_BATCH_SIZE + + def document_batches(): + checkpoint = self.connector.build_dummy_checkpoint() + pending_docs = [] + iterations = 0 + iteration_limit = 100_000 + + while checkpoint.has_more: + wrapper = CheckpointOutputWrapper() + doc_generator = wrapper( + self.connector.load_from_checkpoint(start_time, end_time, checkpoint) + ) + for document, failure, next_checkpoint in doc_generator: + if failure is not None: + logging.warning( + "SharePoint connector failure: %s", + getattr(failure, "failure_message", failure), + ) + continue + if document is not None: + pending_docs.append(document) + if len(pending_docs) >= batch_size: + yield pending_docs + pending_docs = [] + if next_checkpoint is not None: + checkpoint = next_checkpoint + + iterations += 1 + if iterations > iteration_limit: + raise RuntimeError("Too many iterations while loading SharePoint documents.") + + if pending_docs: + yield pending_docs + + self.log_connection("SharePoint", self.conf.get("credentials", {}).get("site_url", ""), task) + return document_batches() class Slack(SyncBase): diff --git a/test/unit_test/data_source/test_sharepoint_connector_unit.py b/test/unit_test/data_source/test_sharepoint_connector_unit.py new file mode 100644 index 0000000000..ed12b6714b --- /dev/null +++ b/test/unit_test/data_source/test_sharepoint_connector_unit.py @@ -0,0 +1,284 @@ +# +# Copyright 2026 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import importlib.util +import sys +from datetime import datetime, timezone +from pathlib import Path +from types import ModuleType + +import pytest + + +def _load_sharepoint_connector_module(): + """Load sharepoint_connector.py in isolation (avoid the package __init__).""" + repo_root = Path(__file__).resolve().parents[3] + package_name = "common.data_source" + saved_modules = { + name: module + for name, module in sys.modules.items() + if name == package_name or name.startswith(f"{package_name}.") + } + package_stub = ModuleType(package_name) + package_stub.__path__ = [str(repo_root / "common" / "data_source")] + sys.modules[package_name] = package_stub + + try: + spec = importlib.util.spec_from_file_location( + "_sharepoint_connector_under_test", + repo_root / "common" / "data_source" / "sharepoint_connector.py", + ) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + finally: + for name in list(sys.modules): + if name == package_name or name.startswith(f"{package_name}."): + if name in saved_modules: + sys.modules[name] = saved_modules[name] + else: + sys.modules.pop(name, None) + + +sharepoint_connector = _load_sharepoint_connector_module() +SharePointConnector = sharepoint_connector.SharePointConnector + + +# --- fakes for the office365 fluent API ------------------------------------ + + +class _Query: + """Mimics the `.get()` / `.get_by_url()` -> `.execute_query()` chain.""" + + def __init__(self, value): + self._value = value + + def execute_query(self): + return self._value + + +class _Content: + def __init__(self, value: bytes): + self.value = value + + def execute_query(self): + return self + + +class _FakeDriveItem: + def __init__(self, item_id, name=None, content=None, modified=None, children=None, size=None): + self.id = item_id + self.name = name + self.web_url = f"https://contoso.sharepoint.com/{item_id}" + self.last_modified_datetime = modified + self._content = content + self._children = children or [] + self.properties = {} + if children is not None: + self.properties["folder"] = {"childCount": len(children)} + else: + self.properties["file"] = {"mimeType": "text/plain"} + if size is not None: + self.properties["size"] = size + + @property + def children(self): + return _FakeDrivesAccessor(self._children) + + def get_content(self): + return _Content(self._content) + + +class _FakeDrive: + def __init__(self, name, root, drive_id=None): + self.name = name + self.root = root + self.id = drive_id or f"drive-{name}" + self.properties = {"name": name, "id": self.id} + + +class _FakeDrivesAccessor: + def __init__(self, drives): + self._drives = drives + + def get(self): + return _Query(self._drives) + + +class _FakeSite: + def __init__(self, drives): + self.drives = _FakeDrivesAccessor(drives) + + def __bool__(self): + return True + + +class _FakeSitesAccessor: + def __init__(self, site): + self._site = site + + def get_by_url(self, url): + return _Query(self._site) + + +class _FakeGraphClient: + def __init__(self, site): + self.sites = _FakeSitesAccessor(site) + + +def _build_connector_with_tree(): + jan = datetime(2026, 1, 1, 12, tzinfo=timezone.utc) + feb = datetime(2026, 2, 1, 12, tzinfo=timezone.utc) + + readme = _FakeDriveItem("f1", "readme.txt", b"hello sharepoint", jan, size=16) + nested = _FakeDriveItem("f2", "report.md", b"# Report", feb, size=8) + subfolder = _FakeDriveItem("d2", "sub", children=[nested]) + root = _FakeDriveItem("d1", "root", children=[readme, subfolder]) + drive = _FakeDrive("Documents", root, drive_id="drv-A") + site = _FakeSite([drive]) + + connector = SharePointConnector(batch_size=10) + connector.graph_client = _FakeGraphClient(site) + connector._site_url = "https://contoso.sharepoint.com/sites/MySite" + return connector, jan, feb + + +# --- credential loading ----------------------------------------------------- + + +def test_load_credentials_incomplete_raises(): + connector = SharePointConnector() + with pytest.raises(sharepoint_connector.ConnectorMissingCredentialError): + connector.load_credentials({"tenant_id": "t", "client_id": "c"}) + + +def test_load_credentials_sets_graph_client(monkeypatch): + captured = {} + + class _FakeApp: + def __init__(self, **kwargs): + captured.update(kwargs) + + def acquire_token_for_client(self, scopes): + return {"access_token": "tok"} + + monkeypatch.setattr(sharepoint_connector.msal, "ConfidentialClientApplication", _FakeApp) + monkeypatch.setattr(sharepoint_connector, "GraphClient", lambda token_callback: ("client", token_callback)) + + connector = SharePointConnector() + result = connector.load_credentials( + { + "tenant_id": "tenant", + "client_id": "client", + "client_secret": "secret", + "site_url": "https://contoso.sharepoint.com/sites/MySite", + } + ) + + assert result is None + assert connector._site_url == "https://contoso.sharepoint.com/sites/MySite" + assert connector.graph_client is not None + + +def test_fetch_without_credentials_raises(): + connector = SharePointConnector() + with pytest.raises(sharepoint_connector.ConnectorMissingCredentialError): + list(connector.load_from_checkpoint(0.0, 9e12, connector.build_dummy_checkpoint())) + + +# --- document generation ---------------------------------------------------- + + +def _collect(generator): + """Drain a checkpoint generator, returning (documents, final_checkpoint).""" + docs = [] + try: + while True: + docs.append(next(generator)) + except StopIteration as stop: + return docs, stop.value + + +def test_load_from_checkpoint_walks_libraries_and_downloads(): + connector, _jan, _feb = _build_connector_with_tree() + + docs, checkpoint = _collect( + connector.load_from_checkpoint(0.0, 9e12, connector.build_dummy_checkpoint()) + ) + + assert checkpoint.has_more is False + assert {doc.id for doc in docs} == {"drv-A:f1", "drv-A:f2"} + + by_id = {doc.id: doc for doc in docs} + assert by_id["drv-A:f1"].blob == b"hello sharepoint" + assert by_id["drv-A:f1"].extension == ".txt" + assert by_id["drv-A:f1"].size_bytes == 16 + assert by_id["drv-A:f1"].source == "sharepoint" + assert by_id["drv-A:f1"].metadata["drive"] == "Documents" + assert by_id["drv-A:f1"].metadata["drive_id"] == "drv-A" + assert by_id["drv-A:f1"].metadata["drive_item_id"] == "f1" + assert by_id["drv-A:f2"].semantic_identifier == "report.md" + assert by_id["drv-A:f2"].extension == ".md" + + +def test_load_from_checkpoint_filters_by_modified_window(): + connector, _jan, feb = _build_connector_with_tree() + + # Only include files modified strictly after mid-January -> just report.md (Feb). + start = datetime(2026, 1, 15, tzinfo=timezone.utc).timestamp() + end = datetime(2026, 3, 1, tzinfo=timezone.utc).timestamp() + + docs, _ = _collect( + connector.load_from_checkpoint(start, end, connector.build_dummy_checkpoint()) + ) + + assert [doc.id for doc in docs] == ["drv-A:f2"] + + +def test_retrieve_all_slim_docs_lists_ids_without_download(): + connector, _jan, _feb = _build_connector_with_tree() + + batches = list(connector.retrieve_all_slim_docs_perm_sync()) + ids = [doc.id for batch in batches for doc in batch] + + assert sorted(ids) == ["drv-A:f1", "drv-A:f2"] + + +def test_document_ids_are_unique_across_drives_with_colliding_item_ids(): + # Graph driveItem IDs are unique only within a single drive; two libraries + # under the same site can legitimately yield items with identical IDs. + jan = datetime(2026, 1, 1, 12, tzinfo=timezone.utc) + + file_a = _FakeDriveItem("same-id", "a.txt", b"A", jan, size=1) + root_a = _FakeDriveItem("rootA", "root", children=[file_a]) + drive_a = _FakeDrive("LibraryA", root_a, drive_id="drv-A") + + file_b = _FakeDriveItem("same-id", "b.txt", b"B", jan, size=1) + root_b = _FakeDriveItem("rootB", "root", children=[file_b]) + drive_b = _FakeDrive("LibraryB", root_b, drive_id="drv-B") + + site = _FakeSite([drive_a, drive_b]) + connector = SharePointConnector(batch_size=10) + connector.graph_client = _FakeGraphClient(site) + connector._site_url = "https://contoso.sharepoint.com/sites/MySite" + + docs, _ = _collect( + connector.load_from_checkpoint(0.0, 9e12, connector.build_dummy_checkpoint()) + ) + ids = {doc.id for doc in docs} + assert ids == {"drv-A:same-id", "drv-B:same-id"} + + slim_ids = [doc.id for batch in connector.retrieve_all_slim_docs_perm_sync() for doc in batch] + assert sorted(slim_ids) == ["drv-A:same-id", "drv-B:same-id"] diff --git a/web/src/assets/svg/data-source/sharepoint.svg b/web/src/assets/svg/data-source/sharepoint.svg new file mode 100644 index 0000000000..fc8ed27b0b --- /dev/null +++ b/web/src/assets/svg/data-source/sharepoint.svg @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 18f988691c..1f8f1fe117 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1238,6 +1238,10 @@ Example: Virtual Hosted Style`, 'Upload the OAuth JSON generated from Google Console. If it only contains client credentials, run the browser-based verification once to mint long-lived refresh tokens.', dropboxDescription: 'Connect your Dropbox to sync files and folders from a chosen account.', + sharepointDescription: + 'Connect a SharePoint site via Microsoft Graph to sync its document libraries.', + sharepointSiteUrlTip: + 'Full URL of the SharePoint site to index, e.g. https://contoso.sharepoint.com/sites/MySite. Requires an Azure AD app with Sites.Read.All and Files.Read.All application permissions (admin consent).', bitbucketDescription: 'Connect Bitbucket to sync PR content.', bitbucketTopWorkspaceTip: 'The Bitbucket workspace to index (e.g., "atlassian" from https://bitbucket.org/atlassian/workspace ).', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 17c368ee19..cd0c6709cb 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -1099,6 +1099,9 @@ NER:使用 spaCy NER 和基于规则的关键词提取来抽取实体和关系 gmailTokenTip: '请上传由 Google Console 生成的 OAuth JSON。如果仅包含 client credentials,请通过浏览器授权一次以获取长期有效的刷新 Token。', dropboxDescription: '连接 Dropbox,同步指定账号下的文件与文件夹。', + sharepointDescription: '通过 Microsoft Graph 连接 SharePoint 站点,同步其文档库。', + sharepointSiteUrlTip: + '要索引的 SharePoint 站点完整 URL,例如 https://contoso.sharepoint.com/sites/MySite。需要具备 Sites.Read.All 与 Files.Read.All 应用权限(管理员同意)的 Azure AD 应用。', boxDescription: '连接你的 Box 云盘以同步文件和文件夹。', bitbucketDescription: '连接 Bitbucket,同步 PR 内容。', bitbucketTopWorkspaceTip: diff --git a/web/src/pages/user-setting/data-source/constant/index.tsx b/web/src/pages/user-setting/data-source/constant/index.tsx index f1d2bb4ace..026570eca3 100644 --- a/web/src/pages/user-setting/data-source/constant/index.tsx +++ b/web/src/pages/user-setting/data-source/constant/index.tsx @@ -43,8 +43,8 @@ export enum DataSourceKey { POSTGRESQL = 'postgresql', REST_API = 'rest_api', RSS = 'rss', + SHAREPOINT = 'sharepoint', - // SHAREPOINT = 'sharepoint', // SLACK = 'slack', // TEAMS = 'teams', } @@ -213,6 +213,11 @@ export const generateDataSourceInfo = (t: TFunction) => { description: t(`setting.${DataSourceKey.MOODLE}Description`), icon: , }, + [DataSourceKey.SHAREPOINT]: { + name: 'SharePoint', + description: t(`setting.${DataSourceKey.SHAREPOINT}Description`), + icon: , + }, [DataSourceKey.JIRA]: { name: 'Jira', description: t(`setting.${DataSourceKey.JIRA}Description`), @@ -654,6 +659,34 @@ export const DataSourceFormFields = { required: true, }, ], + [DataSourceKey.SHAREPOINT]: [ + { + label: 'Site URL', + name: 'config.credentials.site_url', + type: FormFieldType.Text, + required: true, + placeholder: 'https://contoso.sharepoint.com/sites/MySite', + tooltip: t('setting.sharepointSiteUrlTip'), + }, + { + label: 'Tenant ID', + name: 'config.credentials.tenant_id', + type: FormFieldType.Text, + required: true, + }, + { + label: 'Client ID', + name: 'config.credentials.client_id', + type: FormFieldType.Text, + required: true, + }, + { + label: 'Client Secret', + name: 'config.credentials.client_secret', + type: FormFieldType.Password, + required: true, + }, + ], [DataSourceKey.JIRA]: jiraConstant(t), [DataSourceKey.WEBDAV]: [ { @@ -1509,6 +1542,18 @@ export const DataSourceFormDefaultValues = { }, }, }, + [DataSourceKey.SHAREPOINT]: { + name: '', + source: DataSourceKey.SHAREPOINT, + config: { + credentials: { + site_url: '', + tenant_id: '', + client_id: '', + client_secret: '', + }, + }, + }, [DataSourceKey.JIRA]: { name: '', source: DataSourceKey.JIRA,