fix: add bucket prefix to Azure Blob SPN and SAS storage operations (#14347)

## Summary

Fixes file collision between different datasets when using Azure Blob
storage (SPN or SAS authentication).

## Bug

azure_spn_conn.py and zure_sas_conn.py ignored the ucket parameter
entirely, storing all files flat with just the filename. This caused
files with the same name from different datasets (knowledge bases) to
overwrite each other.

## Fix

Prepend bucket/ as a path prefix in all methods (put, 
m, get, obj_exist, get_presigned_url, health) to match the behavior of
MinIO and S3 implementations.

## Changes

- **rag/utils/azure_spn_conn.py**: Added {bucket}/ prefix to file paths
in all operations
- **rag/utils/azure_sas_conn.py**: Same fix applied for consistency
(also noted in the original issue)

## Testing

Manual verification: files from different datasets now stored under
distinct bucket/ prefixes, preventing collisions.

Fixes #14159

Co-authored-by: Hunter <hunter@yitong.ai>
Co-authored-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
D2758695161
2026-05-08 12:06:28 +08:00
committed by GitHub
parent c7ddc8c039
commit f063e03a24
2 changed files with 13 additions and 10 deletions

View File

@@ -49,7 +49,7 @@ class RAGFlowAzureSasBlob:
def health(self):
_bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
return self.conn.upload_blob(name=fnm, data=BytesIO(binary), length=len(binary))
return self.conn.upload_blob(name=f"{_bucket}/{fnm}", data=BytesIO(binary), length=len(binary))
def put(self, bucket, fnm, binary, tenant_id=None):
blob_name = f"{bucket}/{fnm}"
@@ -77,13 +77,14 @@ class RAGFlowAzureSasBlob:
logging.exception(f"fail get {blob_name}")
self.__open__()
time.sleep(1)
return
return None
def obj_exist(self, bucket, fnm):
blob_name = f"{bucket}/{fnm}"
try:
return self.conn.get_blob_client(f"{bucket}/{fnm}").exists()
return self.conn.get_blob_client(f"{blob_name}").exists()
except Exception:
logging.exception(f"Fail put {bucket}/{fnm}")
logging.exception(f"Fail put {blob_name}")
return False
def get_presigned_url(self, bucket, fnm, expires):
@@ -95,4 +96,4 @@ class RAGFlowAzureSasBlob:
logging.exception(f"fail get {blob_name}")
self.__open__()
time.sleep(1)
return
return None

View File

@@ -64,7 +64,7 @@ class RAGFlowAzureSpnBlob:
def health(self):
_bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
f = self.conn.create_file(fnm)
f = self.conn.create_file(f"{_bucket}/{fnm}")
f.append_data(binary, offset=0, length=len(binary))
return f.flush_data(len(binary))
@@ -83,10 +83,11 @@ class RAGFlowAzureSpnBlob:
return None
def rm(self, bucket, fnm):
blob = f"{bucket}/{fnm}"
try:
self.conn.delete_file(f"{bucket}/{fnm}")
self.conn.delete_file(f"{blob}")
except Exception:
logging.exception(f"Fail rm {bucket}/{fnm}")
logging.exception(f"Fail rm {blob}")
def get(self, bucket, fnm):
blob = f"{bucket}/{fnm}"
@@ -102,11 +103,12 @@ class RAGFlowAzureSpnBlob:
return None
def obj_exist(self, bucket, fnm):
blob = f"{bucket}/{fnm}"
try:
client = self.conn.get_file_client(f"{bucket}/{fnm}")
client = self.conn.get_blob_client(f"{blob}")
return client.exists()
except Exception:
logging.exception(f"Fail put {bucket}/{fnm}")
logging.exception(f"Fail put {blob}")
return False
def get_presigned_url(self, bucket, fnm, expires):