From f063e03a245540ec01800fe2e8a7f7dc489826d2 Mon Sep 17 00:00:00 2001 From: D2758695161 <13510221939@163.com> Date: Fri, 8 May 2026 12:06:28 +0800 Subject: [PATCH] fix: add bucket prefix to Azure Blob SPN and SAS storage operations (#14347) ## Summary Fixes file collision between different datasets when using Azure Blob storage (SPN or SAS authentication). ## Bug azure_spn_conn.py and zure_sas_conn.py ignored the ucket parameter entirely, storing all files flat with just the filename. This caused files with the same name from different datasets (knowledge bases) to overwrite each other. ## Fix Prepend bucket/ as a path prefix in all methods (put, m, get, obj_exist, get_presigned_url, health) to match the behavior of MinIO and S3 implementations. ## Changes - **rag/utils/azure_spn_conn.py**: Added {bucket}/ prefix to file paths in all operations - **rag/utils/azure_sas_conn.py**: Same fix applied for consistency (also noted in the original issue) ## Testing Manual verification: files from different datasets now stored under distinct bucket/ prefixes, preventing collisions. Fixes #14159 Co-authored-by: Hunter Co-authored-by: Jin Hai --- rag/utils/azure_sas_conn.py | 11 ++++++----- rag/utils/azure_spn_conn.py | 12 +++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/rag/utils/azure_sas_conn.py b/rag/utils/azure_sas_conn.py index 96442a2f07..9d43bcbf54 100644 --- a/rag/utils/azure_sas_conn.py +++ b/rag/utils/azure_sas_conn.py @@ -49,7 +49,7 @@ class RAGFlowAzureSasBlob: def health(self): _bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1" - return self.conn.upload_blob(name=fnm, data=BytesIO(binary), length=len(binary)) + return self.conn.upload_blob(name=f"{_bucket}/{fnm}", data=BytesIO(binary), length=len(binary)) def put(self, bucket, fnm, binary, tenant_id=None): blob_name = f"{bucket}/{fnm}" @@ -77,13 +77,14 @@ class RAGFlowAzureSasBlob: logging.exception(f"fail get {blob_name}") self.__open__() time.sleep(1) - return + return None def obj_exist(self, bucket, fnm): + blob_name = f"{bucket}/{fnm}" try: - return self.conn.get_blob_client(f"{bucket}/{fnm}").exists() + return self.conn.get_blob_client(f"{blob_name}").exists() except Exception: - logging.exception(f"Fail put {bucket}/{fnm}") + logging.exception(f"Fail put {blob_name}") return False def get_presigned_url(self, bucket, fnm, expires): @@ -95,4 +96,4 @@ class RAGFlowAzureSasBlob: logging.exception(f"fail get {blob_name}") self.__open__() time.sleep(1) - return + return None diff --git a/rag/utils/azure_spn_conn.py b/rag/utils/azure_spn_conn.py index e19c2e1fe1..ac23ecb172 100644 --- a/rag/utils/azure_spn_conn.py +++ b/rag/utils/azure_spn_conn.py @@ -64,7 +64,7 @@ class RAGFlowAzureSpnBlob: def health(self): _bucket, fnm, binary = "txtxtxtxt1", "txtxtxtxt1", b"_t@@@1" - f = self.conn.create_file(fnm) + f = self.conn.create_file(f"{_bucket}/{fnm}") f.append_data(binary, offset=0, length=len(binary)) return f.flush_data(len(binary)) @@ -83,10 +83,11 @@ class RAGFlowAzureSpnBlob: return None def rm(self, bucket, fnm): + blob = f"{bucket}/{fnm}" try: - self.conn.delete_file(f"{bucket}/{fnm}") + self.conn.delete_file(f"{blob}") except Exception: - logging.exception(f"Fail rm {bucket}/{fnm}") + logging.exception(f"Fail rm {blob}") def get(self, bucket, fnm): blob = f"{bucket}/{fnm}" @@ -102,11 +103,12 @@ class RAGFlowAzureSpnBlob: return None def obj_exist(self, bucket, fnm): + blob = f"{bucket}/{fnm}" try: - client = self.conn.get_file_client(f"{bucket}/{fnm}") + client = self.conn.get_blob_client(f"{blob}") return client.exists() except Exception: - logging.exception(f"Fail put {bucket}/{fnm}") + logging.exception(f"Fail put {blob}") return False def get_presigned_url(self, bucket, fnm, expires):