mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
fix: add bucket prefix to Azure Blob SPN and SAS storage operations (#14347)
## Summary
Fixes file collision between different datasets when using Azure Blob
storage (SPN or SAS authentication).
## Bug
azure_spn_conn.py and zure_sas_conn.py ignored the ucket parameter
entirely, storing all files flat with just the filename. This caused
files with the same name from different datasets (knowledge bases) to
overwrite each other.
## Fix
Prepend bucket/ as a path prefix in all methods (put,
m, get, obj_exist, get_presigned_url, health) to match the behavior of
MinIO and S3 implementations.
## Changes
- **rag/utils/azure_spn_conn.py**: Added {bucket}/ prefix to file paths
in all operations
- **rag/utils/azure_sas_conn.py**: Same fix applied for consistency
(also noted in the original issue)
## Testing
Manual verification: files from different datasets now stored under
distinct bucket/ prefixes, preventing collisions.
Fixes #14159
Co-authored-by: Hunter <hunter@yitong.ai>
Co-authored-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@@ -49,7 +49,7 @@ class RAGFlowAzureSasBlob:
|
|||||||
|
|
||||||
def health(self):
|
def health(self):
|
||||||
_bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
|
_bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
|
||||||
return self.conn.upload_blob(name=fnm, data=BytesIO(binary), length=len(binary))
|
return self.conn.upload_blob(name=f"{_bucket}/{fnm}", data=BytesIO(binary), length=len(binary))
|
||||||
|
|
||||||
def put(self, bucket, fnm, binary, tenant_id=None):
|
def put(self, bucket, fnm, binary, tenant_id=None):
|
||||||
blob_name = f"{bucket}/{fnm}"
|
blob_name = f"{bucket}/{fnm}"
|
||||||
@@ -77,13 +77,14 @@ class RAGFlowAzureSasBlob:
|
|||||||
logging.exception(f"fail get {blob_name}")
|
logging.exception(f"fail get {blob_name}")
|
||||||
self.__open__()
|
self.__open__()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
return
|
return None
|
||||||
|
|
||||||
def obj_exist(self, bucket, fnm):
|
def obj_exist(self, bucket, fnm):
|
||||||
|
blob_name = f"{bucket}/{fnm}"
|
||||||
try:
|
try:
|
||||||
return self.conn.get_blob_client(f"{bucket}/{fnm}").exists()
|
return self.conn.get_blob_client(f"{blob_name}").exists()
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception(f"Fail put {bucket}/{fnm}")
|
logging.exception(f"Fail put {blob_name}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_presigned_url(self, bucket, fnm, expires):
|
def get_presigned_url(self, bucket, fnm, expires):
|
||||||
@@ -95,4 +96,4 @@ class RAGFlowAzureSasBlob:
|
|||||||
logging.exception(f"fail get {blob_name}")
|
logging.exception(f"fail get {blob_name}")
|
||||||
self.__open__()
|
self.__open__()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
return
|
return None
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ class RAGFlowAzureSpnBlob:
|
|||||||
|
|
||||||
def health(self):
|
def health(self):
|
||||||
_bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
|
_bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1"
|
||||||
f = self.conn.create_file(fnm)
|
f = self.conn.create_file(f"{_bucket}/{fnm}")
|
||||||
f.append_data(binary, offset=0, length=len(binary))
|
f.append_data(binary, offset=0, length=len(binary))
|
||||||
return f.flush_data(len(binary))
|
return f.flush_data(len(binary))
|
||||||
|
|
||||||
@@ -83,10 +83,11 @@ class RAGFlowAzureSpnBlob:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def rm(self, bucket, fnm):
|
def rm(self, bucket, fnm):
|
||||||
|
blob = f"{bucket}/{fnm}"
|
||||||
try:
|
try:
|
||||||
self.conn.delete_file(f"{bucket}/{fnm}")
|
self.conn.delete_file(f"{blob}")
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception(f"Fail rm {bucket}/{fnm}")
|
logging.exception(f"Fail rm {blob}")
|
||||||
|
|
||||||
def get(self, bucket, fnm):
|
def get(self, bucket, fnm):
|
||||||
blob = f"{bucket}/{fnm}"
|
blob = f"{bucket}/{fnm}"
|
||||||
@@ -102,11 +103,12 @@ class RAGFlowAzureSpnBlob:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def obj_exist(self, bucket, fnm):
|
def obj_exist(self, bucket, fnm):
|
||||||
|
blob = f"{bucket}/{fnm}"
|
||||||
try:
|
try:
|
||||||
client = self.conn.get_file_client(f"{bucket}/{fnm}")
|
client = self.conn.get_blob_client(f"{blob}")
|
||||||
return client.exists()
|
return client.exists()
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception(f"Fail put {bucket}/{fnm}")
|
logging.exception(f"Fail put {blob}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_presigned_url(self, bucket, fnm, expires):
|
def get_presigned_url(self, bucket, fnm, expires):
|
||||||
|
|||||||
Reference in New Issue
Block a user