mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
### What problem does this PR solve? Closes #15465. RAGFlow supports S3, Google Cloud Storage, R2, and OCI as data sources but not Azure Blob Storage, leaving Azure users without a way to index container objects into a knowledge base. This adds a first-class Azure Blob Storage data-source connector — distinct from RAGFlow's existing Azure storage *backends* (`rag/utils/azure_sas_conn.py`, `rag/utils/azure_spn_conn.py`) which store RAGFlow's own files. **Highlights** - `common/data_source/azure_blob_connector.py`: new `AzureBlobConnector` (`CheckpointedConnectorWithPermSync` + `SlimConnectorWithPermSync`). - Uses the existing `azure-storage-blob` dependency (already in `pyproject.toml`). - Three auth modes, tried in order of precedence: 1. **Account key** — `account_name` + `account_key` + `container_name`. 2. **Connection string** — `connection_string` + `container_name`. 3. **SAS token** — `container_url` + `sas_token` (same shape as `RAGFlowAzureSasBlob`). - ETag fingerprint stored per blob in `AzureBlobCheckpoint.etags` — unchanged blobs (same ETag as last run) are skipped without a download. Only new/modified blobs are fetched. - Optional `prefix` scopes indexing to a virtual folder. - `validate_connector_settings()` probes `get_container_properties()` and maps `AuthenticationFailed / 403 / ContainerNotFound` to typed connector exceptions. - Slim-doc IDs are blob names so prune reconciles correctly. - `common/constants.py`, `common/data_source/config.py`, `common/data_source/__init__.py`: register `azure_blob` in `FileSource` / `DocumentSource` and export `AzureBlobConnector`. - `rag/svr/sync_data_source.py`: new `AzureBlob(SyncBase)` class routed through `load_from_checkpoint` (ETag fingerprint owns change-detection) and added to `func_factory`. - Frontend: - `web/src/pages/user-setting/data-source/constant/index.tsx`: new `DataSourceKey.AZURE_BLOB`, auth-mode selector (account key / connection string / SAS token), all credential fields, prefix + batch-size, `syncDeletedFiles` capability, default form values, tile entry with icon. - `web/src/locales/{en,zh}.ts`: description + per-field tooltips for all 9 new keys. - `web/src/assets/svg/data-source/azure-blob.svg`: Azure-branded stacked-cylinders icon. **Verification** - `npm run build` (vite + esbuild) passes (37 s). ### Type of change - [x] New Feature (non-breaking change which adds functionality)
99 lines
3.5 KiB
Python
99 lines
3.5 KiB
Python
|
|
"""
|
|
Thanks to https://github.com/onyx-dot-app/onyx
|
|
|
|
Content of this directory is under the "MIT Expat" license as defined below.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in all
|
|
copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
SOFTWARE.
|
|
"""
|
|
|
|
from .blob_connector import BlobStorageConnector
|
|
from .rss_connector import RSSConnector
|
|
from .slack_connector import SlackConnector
|
|
from .gmail_connector import GmailConnector
|
|
from .notion_connector import NotionConnector
|
|
from .confluence_connector import ConfluenceConnector
|
|
from .discord_connector import DiscordConnector
|
|
from .dropbox_connector import DropboxConnector
|
|
from .google_drive.connector import GoogleDriveConnector
|
|
from .jira.connector import JiraConnector
|
|
from .sharepoint_connector import SharePointConnector
|
|
from .onedrive_connector import OneDriveConnector
|
|
from .outlook_connector import OutlookConnector
|
|
from .azure_blob_connector import AzureBlobConnector
|
|
from .teams_connector import TeamsConnector
|
|
from .moodle_connector import MoodleConnector
|
|
from .airtable_connector import AirtableConnector
|
|
from .dingtalk_ai_table_connector import DingTalkAITableConnector
|
|
from .asana_connector import AsanaConnector
|
|
from .imap_connector import ImapConnector
|
|
from .zendesk_connector import ZendeskConnector
|
|
from .seafile_connector import SeaFileConnector
|
|
from .rdbms_connector import RDBMSConnector
|
|
from .webdav_connector import WebDAVConnector
|
|
from .rest_api_connector import RestAPIConnector
|
|
from .config import BlobType, DocumentSource
|
|
from .models import Document, TextSection, ImageSection, BasicExpertInfo
|
|
from .exceptions import (
|
|
ConnectorMissingCredentialError,
|
|
ConnectorValidationError,
|
|
CredentialExpiredError,
|
|
InsufficientPermissionsError,
|
|
UnexpectedValidationError
|
|
)
|
|
|
|
__all__ = [
|
|
"BlobStorageConnector",
|
|
"RSSConnector",
|
|
"SlackConnector",
|
|
"GmailConnector",
|
|
"NotionConnector",
|
|
"ConfluenceConnector",
|
|
"DiscordConnector",
|
|
"DropboxConnector",
|
|
"GoogleDriveConnector",
|
|
"JiraConnector",
|
|
"SharePointConnector",
|
|
"OneDriveConnector",
|
|
"OutlookConnector",
|
|
"AzureBlobConnector",
|
|
"TeamsConnector",
|
|
"MoodleConnector",
|
|
"BlobType",
|
|
"DocumentSource",
|
|
"Document",
|
|
"TextSection",
|
|
"ImageSection",
|
|
"BasicExpertInfo",
|
|
"ConnectorMissingCredentialError",
|
|
"ConnectorValidationError",
|
|
"CredentialExpiredError",
|
|
"InsufficientPermissionsError",
|
|
"UnexpectedValidationError",
|
|
"AirtableConnector",
|
|
"AsanaConnector",
|
|
"ImapConnector",
|
|
"ZendeskConnector",
|
|
"SeaFileConnector",
|
|
"RDBMSConnector",
|
|
"WebDAVConnector",
|
|
"DingTalkAITableConnector",
|
|
"RestAPIConnector",
|
|
]
|