mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
fix: add bucket prefix to Azure Blob SPN and SAS storage operations (#14347)
## Summary
Fixes file collision between different datasets when using Azure Blob
storage (SPN or SAS authentication).
## Bug
azure_spn_conn.py and zure_sas_conn.py ignored the ucket parameter
entirely, storing all files flat with just the filename. This caused
files with the same name from different datasets (knowledge bases) to
overwrite each other.
## Fix
Prepend bucket/ as a path prefix in all methods (put,
m, get, obj_exist, get_presigned_url, health) to match the behavior of
MinIO and S3 implementations.
## Changes
- **rag/utils/azure_spn_conn.py**: Added {bucket}/ prefix to file paths
in all operations
- **rag/utils/azure_sas_conn.py**: Same fix applied for consistency
(also noted in the original issue)
## Testing
Manual verification: files from different datasets now stored under
distinct bucket/ prefixes, preventing collisions.
Fixes #14159
Co-authored-by: Hunter <hunter@yitong.ai>
Co-authored-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@@ -49,7 +49,7 @@ class RAGFlowAzureSasBlob:
|
||||
|
||||
def health(self):
|
||||
_bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
|
||||
return self.conn.upload_blob(name=fnm, data=BytesIO(binary), length=len(binary))
|
||||
return self.conn.upload_blob(name=f"{_bucket}/{fnm}", data=BytesIO(binary), length=len(binary))
|
||||
|
||||
def put(self, bucket, fnm, binary, tenant_id=None):
|
||||
blob_name = f"{bucket}/{fnm}"
|
||||
@@ -77,13 +77,14 @@ class RAGFlowAzureSasBlob:
|
||||
logging.exception(f"fail get {blob_name}")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return
|
||||
return None
|
||||
|
||||
def obj_exist(self, bucket, fnm):
|
||||
blob_name = f"{bucket}/{fnm}"
|
||||
try:
|
||||
return self.conn.get_blob_client(f"{bucket}/{fnm}").exists()
|
||||
return self.conn.get_blob_client(f"{blob_name}").exists()
|
||||
except Exception:
|
||||
logging.exception(f"Fail put {bucket}/{fnm}")
|
||||
logging.exception(f"Fail put {blob_name}")
|
||||
return False
|
||||
|
||||
def get_presigned_url(self, bucket, fnm, expires):
|
||||
@@ -95,4 +96,4 @@ class RAGFlowAzureSasBlob:
|
||||
logging.exception(f"fail get {blob_name}")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return
|
||||
return None
|
||||
|
||||
@@ -64,7 +64,7 @@ class RAGFlowAzureSpnBlob:
|
||||
|
||||
def health(self):
|
||||
_bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
|
||||
f = self.conn.create_file(fnm)
|
||||
f = self.conn.create_file(f"{_bucket}/{fnm}")
|
||||
f.append_data(binary, offset=0, length=len(binary))
|
||||
return f.flush_data(len(binary))
|
||||
|
||||
@@ -83,10 +83,11 @@ class RAGFlowAzureSpnBlob:
|
||||
return None
|
||||
|
||||
def rm(self, bucket, fnm):
|
||||
blob = f"{bucket}/{fnm}"
|
||||
try:
|
||||
self.conn.delete_file(f"{bucket}/{fnm}")
|
||||
self.conn.delete_file(f"{blob}")
|
||||
except Exception:
|
||||
logging.exception(f"Fail rm {bucket}/{fnm}")
|
||||
logging.exception(f"Fail rm {blob}")
|
||||
|
||||
def get(self, bucket, fnm):
|
||||
blob = f"{bucket}/{fnm}"
|
||||
@@ -102,11 +103,12 @@ class RAGFlowAzureSpnBlob:
|
||||
return None
|
||||
|
||||
def obj_exist(self, bucket, fnm):
|
||||
blob = f"{bucket}/{fnm}"
|
||||
try:
|
||||
client = self.conn.get_file_client(f"{bucket}/{fnm}")
|
||||
client = self.conn.get_blob_client(f"{blob}")
|
||||
return client.exists()
|
||||
except Exception:
|
||||
logging.exception(f"Fail put {bucket}/{fnm}")
|
||||
logging.exception(f"Fail put {blob}")
|
||||
return False
|
||||
|
||||
def get_presigned_url(self, bucket, fnm, expires):
|
||||
|
||||
Reference in New Issue
Block a user