Feat: add delete all support for delete operations (#13530)

### What problem does this PR solve?

Add delete all support for delete operations.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update

---------

Co-authored-by: writinwaters <cai.keith@gmail.com>
This commit is contained in:
Yongteng Lei
2026-03-12 09:47:42 +08:00
committed by GitHub
parent d201a81db7
commit e1b632a7bb
19 changed files with 1042 additions and 975 deletions

View File

@@ -235,19 +235,37 @@ async def switch():
@manager.route('/rm', methods=['POST']) # noqa: F821
@login_required
@validate_request("chunk_ids", "doc_id")
@validate_request("doc_id")
async def rm():
req = await get_request_json()
try:
def _rm_sync():
deleted_chunk_ids = req["chunk_ids"]
deleted_chunk_ids = req.get("chunk_ids")
if isinstance(deleted_chunk_ids, list):
unique_chunk_ids = list(dict.fromkeys(deleted_chunk_ids))
has_ids = len(unique_chunk_ids) > 0
else:
elif deleted_chunk_ids is not None:
unique_chunk_ids = [deleted_chunk_ids]
has_ids = deleted_chunk_ids not in (None, "")
else:
unique_chunk_ids = []
has_ids = False
if not has_ids:
if req.get("delete_all") is True:
e, doc = DocumentService.get_by_id(req["doc_id"])
if not e:
return get_data_error_result(message="Document not found!")
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
# Clean up storage assets while index rows still exist for discovery
DocumentService.delete_chunk_images(doc, tenant_id)
condition = {"doc_id": req["doc_id"]}
try:
deleted_count = settings.docStoreConn.delete(condition, search.index_name(tenant_id), doc.kb_id)
except Exception:
return get_data_error_result(message="Chunk deleting failure")
if deleted_count > 0:
DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, deleted_count, 0)
return get_json_result(data=True)
return get_json_result(data=True)
e, doc = DocumentService.get_by_id(req["doc_id"])

View File

@@ -239,6 +239,11 @@ async def delete_chats(tenant_id):
ids = req.get("ids")
if not ids:
if req.get("delete_all") is True:
ids = [d.id for d in DialogService.query(tenant_id=tenant_id, status=StatusEnum.VALID.value)]
if not ids:
return get_result()
else:
return get_result()
id_list = ids

View File

@@ -198,7 +198,13 @@ async def delete(tenant_id):
type: string
description: |
List of dataset IDs to delete.
If `null` or an empty array is provided, no datasets will be deleted.
If `null` or an empty array is provided, no datasets will be deleted
unless `delete_all` is set to `true`.
delete_all:
type: boolean
description: |
If `true` and `ids` is null or empty, delete all datasets owned by the current user.
Defaults to `false`.
responses:
200:
description: Successful operation.
@@ -212,6 +218,11 @@ async def delete(tenant_id):
try:
kb_id_instance_pairs = []
if req["ids"] is None or len(req["ids"]) == 0:
if req.get("delete_all"):
req["ids"] = [kb.id for kb in KnowledgebaseService.query(tenant_id=tenant_id)]
if not req["ids"]:
return get_result()
else:
return get_result()
error_kb_ids = []

View File

@@ -750,6 +750,11 @@ async def delete(tenant_id, dataset_id):
doc_ids = req.get("ids")
if not doc_ids:
if req.get("delete_all") is True:
doc_ids = [doc.id for doc in DocumentService.query(kb_id=dataset_id)]
if not doc_ids:
return get_result()
else:
return get_result()
doc_list = doc_ids
@@ -1343,6 +1348,16 @@ async def rm_chunk(tenant_id, dataset_id, document_id):
chunk_ids = req.get("chunk_ids")
if not chunk_ids:
if req.get("delete_all") is True:
doc = docs[0]
# Clean up storage assets while index rows still exist for discovery
DocumentService.delete_chunk_images(doc, tenant_id)
condition = {"doc_id": document_id}
chunk_number = settings.docStoreConn.delete(condition, search.index_name(tenant_id), dataset_id)
if chunk_number != 0:
DocumentService.decrement_chunk_num(document_id, dataset_id, 1, chunk_number, 0)
return get_result(message=f"deleted {chunk_number} chunks")
else:
return get_result()
condition = {"doc_id": document_id}

View File

@@ -751,6 +751,11 @@ async def delete(tenant_id, chat_id):
ids = req.get("ids")
if not ids:
if req.get("delete_all") is True:
ids = [conv.id for conv in ConversationService.query(dialog_id=chat_id)]
if not ids:
return get_result()
else:
return get_result()
conv_list = ids
@@ -799,6 +804,11 @@ async def delete_agent_session(tenant_id, agent_id):
ids = req.get("ids")
if not ids:
if req.get("delete_all") is True:
ids = [conv.id for conv in API4ConversationService.query(dialog_id=agent_id)]
if not ids:
return get_result()
else:
return get_result()
conv_list = ids

View File

@@ -30,13 +30,13 @@ class File2DocumentService(CommonService):
@DB.connection_context()
def get_by_file_id(cls, file_id):
objs = cls.model.select().where(cls.model.file_id == file_id)
return objs
return list(objs)
@classmethod
@DB.connection_context()
def get_by_document_id(cls, document_id):
objs = cls.model.select().where(cls.model.document_id == document_id)
return objs
return list(objs)
@classmethod
@DB.connection_context()

View File

@@ -649,7 +649,8 @@ class UpdateDatasetReq(CreateDatasetReq):
class DeleteReq(Base):
ids: Annotated[list[str] | None, Field(...)]
ids: Annotated[list[str] | None, Field(default=None)]
delete_all: Annotated[bool, Field(default=False)]
@field_validator("ids", mode="after")
@classmethod

View File

@@ -241,14 +241,16 @@ class InfinityConnectionBase(DocStoreConnection):
Return the health status of the database.
"""
inf_conn = self.connPool.get_conn()
try:
res = inf_conn.show_current_node()
self.connPool.release_conn(inf_conn)
res2 = {
"type": "infinity",
"status": "green" if res.error_code == 0 and res.server_status in ["started", "alive"] else "red",
"error": res.error_msg,
}
return res2
finally:
self.connPool.release_conn(inf_conn)
"""
Table operations
@@ -259,6 +261,7 @@ class InfinityConnectionBase(DocStoreConnection):
self.logger.debug(f"CREATE_IDX: Creating table {table_name}, parser_id: {parser_id}")
inf_conn = self.connPool.get_conn()
try:
inf_db = inf_conn.create_database(self.dbName, ConflictType.Ignore)
# Use configured schema
@@ -333,9 +336,10 @@ class InfinityConnectionBase(DocStoreConnection):
)
self.logger.info(f"INFINITY created secondary index sec_{field_name} for field {field_name} with params {params}")
self.connPool.release_conn(inf_conn)
self.logger.info(f"INFINITY created table {table_name}, vector size {vector_size}")
return True
finally:
self.connPool.release_conn(inf_conn)
def create_doc_meta_idx(self, index_name: str):
"""
@@ -398,25 +402,28 @@ class InfinityConnectionBase(DocStoreConnection):
else:
table_name = f"{index_name}_{dataset_id}"
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
db_instance.drop_table(table_name, ConflictType.Ignore)
self.connPool.release_conn(inf_conn)
self.logger.info(f"INFINITY dropped table {table_name}")
finally:
self.connPool.release_conn(inf_conn)
def index_exist(self, index_name: str, dataset_id: str) -> bool:
if index_name.startswith("ragflow_doc_meta_"):
table_name = index_name
else:
table_name = f"{index_name}_{dataset_id}"
try:
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
_ = db_instance.get_table(table_name)
self.connPool.release_conn(inf_conn)
return True
except Exception as e:
self.logger.warning(f"INFINITY indexExist {str(e)}")
return False
finally:
self.connPool.release_conn(inf_conn)
"""
CRUD operations
@@ -453,6 +460,7 @@ class InfinityConnectionBase(DocStoreConnection):
def delete(self, condition: dict, index_name: str, dataset_id: str) -> int:
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
if index_name.startswith("ragflow_doc_meta_"):
table_name = index_name
@@ -466,8 +474,9 @@ class InfinityConnectionBase(DocStoreConnection):
filter = self.equivalent_condition_to_str(condition, table_instance)
self.logger.debug(f"INFINITY delete table {table_name}, filter {filter}.")
res = table_instance.delete(filter)
self.connPool.release_conn(inf_conn)
return res.deleted_rows
finally:
self.connPool.release_conn(inf_conn)
"""
Helper functions for search result

View File

@@ -657,8 +657,9 @@ Deletes datasets by ID.
- Headers:
- `'content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- Body:
- `"ids"`: `list[string]` or `null`
- `"delete_all"`: `boolean`
##### Request example
@@ -672,12 +673,24 @@ curl --request DELETE \
}'
```
```bash
curl --request DELETE \
--url http://{address}/api/v1/datasets \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"delete_all": true
}'
```
##### Request parameters
- `"ids"`: (*Body parameter*), `list[string]` or `null`, *Required*
- `"ids"`: (*Body parameter*), `list[string]` or `null`
Specifies the datasets to delete:
- If omitted, or set to `null` or an empty array, no datasets are deleted.
- If an array of IDs is provided, only the datasets matching those IDs are deleted.
- `"delete_all"`: (*Body parameter*), `boolean`
Whether to delete all datasets owned by the current user when`"ids"` is omitted, or set to `null` or an empty array. Defaults to `false`.
#### Response
@@ -1801,6 +1814,7 @@ Deletes documents by ID.
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"ids"`: `list[string]`
- `"delete_all"`: `boolean`
##### Request example
@@ -1815,6 +1829,16 @@ curl --request DELETE \
}'
```
```bash
curl --request DELETE \
--url http://{address}/api/v1/datasets/{dataset_id}/documents \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"delete_all": true
}'
```
##### Request parameters
- `dataset_id`: (*Path parameter*)
@@ -1823,6 +1847,8 @@ curl --request DELETE \
The IDs of the documents to delete.
- If omitted, or set to `null` or an empty array, no documents are deleted.
- If an array of IDs is provided, only the documents matching those IDs are deleted.
- `"delete_all"`: (*Body parameter*), `boolean`
Whether to delete all documents in the specified dataset when `"ids"` is omitted, or set to `null` or an empty array. Defaults to `false`.
#### Response
@@ -2161,6 +2187,7 @@ Deletes chunks by ID.
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"chunk_ids"`: `list[string]`
- `"delete_all"`: `boolean`
##### Request example
@@ -2175,6 +2202,16 @@ curl --request DELETE \
}'
```
```bash
curl --request DELETE \
--url http://{address}/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"delete_all": true
}'
```
##### Request parameters
- `dataset_id`: (*Path parameter*)
@@ -2185,6 +2222,8 @@ curl --request DELETE \
The IDs of the chunks to delete.
- If omitted, or set to `null` or an empty array, no chunks are deleted.
- If an array of IDs is provided, only the chunks matching those IDs are deleted.
- `"delete_all"`: (*Body parameter*), `boolean`
Whether to delete all chunks of the specified documen when `"chunk_ids"` is omitted, or set to`null` or an empty array. Defaults to `false`.
#### Response
@@ -2938,6 +2977,7 @@ Deletes chat assistants by ID.
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"ids"`: `list[string]`
- `"delete_all"`: `boolean`
##### Request example
@@ -2952,12 +2992,24 @@ curl --request DELETE \
}'
```
```bash
curl --request DELETE \
--url http://{address}/api/v1/chats \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"delete_all": true
}'
```
##### Request parameters
- `"ids"`: (*Body parameter*), `list[string]`
The IDs of the chat assistants to delete.
- If omitted, or set to `null` or an empty array, no chat assistants are deleted.
- If an array of IDs is provided, only the chat assistants matching those IDs are deleted.
- `"delete_all"`: (*Body parameter*), `boolean`
Whether to delete all chat assistants owned by the current user when `"ids"` is omitted, or set to`null` or an empty array. Defaults to `false`.
#### Response
@@ -3316,6 +3368,7 @@ Deletes sessions of a chat assistant by ID.
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"ids"`: `list[string]`
- `"delete_all"`: `boolean`
##### Request example
@@ -3330,6 +3383,16 @@ curl --request DELETE \
}'
```
```bash
curl --request DELETE \
--url http://{address}/api/v1/chats/{chat_id}/sessions \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"delete_all": true
}'
```
##### Request Parameters
- `chat_id`: (*Path parameter*)
@@ -3338,6 +3401,8 @@ curl --request DELETE \
The IDs of the sessions to delete.
- If omitted, or set to `null` or an empty array, no sessions are deleted.
- If an array of IDs is provided, only the sessions matching those IDs are deleted.
- `"delete_all"`: (*Body Parameter*), `boolean`
Whether to delete all sessions of the specified chat assistant when `"ids"` is omitted, or set to `null` or an empty array. Defaults to `false`.
#### Response
@@ -4682,6 +4747,7 @@ Deletes sessions of an agent by ID.
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"ids"`: `list[string]`
- `"delete_all"`: `boolean`
##### Request example
@@ -4696,6 +4762,16 @@ curl --request DELETE \
}'
```
```bash
curl --request DELETE \
--url http://{address}/api/v1/agents/{agent_id}/sessions \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"delete_all": true
}'
```
##### Request Parameters
- `agent_id`: (*Path parameter*)
@@ -4704,6 +4780,8 @@ curl --request DELETE \
The IDs of the sessions to delete.
- If omitted, or set to `null` or an empty array, no sessions are deleted.
- If an array of IDs is provided, only the sessions matching those IDs are deleted.
- `"delete_all"`: (*Body Parameter*), `boolean`
Whether to delete all sessions of the specified agent when `"ids"` is omitted, or set to `null` or an empty array. Defaults to `false`.
#### Response

View File

@@ -230,20 +230,24 @@ dataset = rag_object.create_dataset(name="kb_1")
### Delete datasets
```python
RAGFlow.delete_datasets(ids: list[str] | None = None)
RAGFlow.delete_datasets(ids: list[str] | None = None, delete_all: bool = False)
```
Deletes datasets by ID.
#### Parameters
##### ids: `list[str]` or `None`, *Required*
##### ids: `list[str]` or `None`
The IDs of the datasets to delete. Defaults to `None`.
- If omitted, or set to `null` or an empty array, no datasets are deleted.
- If an array of IDs is provided, only the datasets matching those IDs are deleted.
##### delete_all: `bool`
Whether to delete all datasets owned by the current user when `ids` is omitted, or set to `None` or an empty list. Defaults to `False`.
#### Returns
- Success: No value is returned.
@@ -253,6 +257,7 @@ The IDs of the datasets to delete. Defaults to `None`.
```python
rag_object.delete_datasets(ids=["d94a8dc02c9711f0930f7fbc369eab6d","e94a8dc02c9711f0930f7fbc369eab6e"])
rag_object.delete_datasets(delete_all=True)
```
---
@@ -672,7 +677,7 @@ for doc in dataset.list_documents(keywords="rag", page=0, page_size=12):
### Delete documents
```python
DataSet.delete_documents(ids: list[str] = None)
DataSet.delete_documents(ids: list[str] | None = None, delete_all: bool = False)
```
Deletes documents by ID.
@@ -686,6 +691,10 @@ The IDs of the documents to delete. Defaults to `None`.
- If omitted, or set to `null` or an empty array, no documents are deleted.
- If an array of IDs is provided, only the documents matching those IDs are deleted.
##### delete_all: `bool`
Whether to delete all documents in the current dataset when `ids` is omitted, or set to `None` or an empty list. Defaults to `False`.
#### Returns
- Success: No value is returned.
@@ -700,6 +709,7 @@ rag_object = RAGFlow(api_key="<YOUR_API_KEY>", base_url="http://<YOUR_BASE_URL>:
dataset = rag_object.list_datasets(name="kb_1")
dataset = dataset[0]
dataset.delete_documents(ids=["id_1","id_2"])
dataset.delete_documents(delete_all=True)
```
---
@@ -943,20 +953,24 @@ for chunk in docs[0].list_chunks(keywords="rag", page=0, page_size=12):
### Delete chunks
```python
Document.delete_chunks(chunk_ids: list[str])
Document.delete_chunks(ids: list[str] | None = None, delete_all: bool = False)
```
Deletes chunks by ID.
#### Parameters
##### chunk_ids: `list[str]`
##### ids: `list[str]` or `None`
The IDs of the chunks to delete. Defaults to `None`.
- If omitted, or set to `null` or an empty array, no chunks are deleted.
- If an array of IDs is provided, only the chunks matching those IDs are deleted.
##### delete_all: `bool`
Whether to delete all chunks in the current document when `ids` is omitted, or set to `None` or an empty list. Defaults to `False`.
#### Returns
- Success: No value is returned.
@@ -974,6 +988,7 @@ doc = dataset.list_documents(id="wdfxb5t547d")
doc = doc[0]
chunk = doc.add_chunk(content="xxxxxxx")
doc.delete_chunks(["id_1","id_2"])
doc.delete_chunks(delete_all=True)
```
---
@@ -1249,20 +1264,24 @@ assistant.update({"name": "Stefan", "llm": {"temperature": 0.8}, "prompt": {"top
### Delete chat assistants
```python
RAGFlow.delete_chats(ids: list[str] = None)
RAGFlow.delete_chats(ids: list[str] | None = None, delete_all: bool = False)
```
Deletes chat assistants by ID.
#### Parameters
##### ids: `list[str]`
##### ids: `list[str]` or `None`
The IDs of the chat assistants to delete. Defaults to `None`.
- If omitted, or set to `null` or an empty array, no chat assistants are deleted.
- If an array of IDs is provided, only the chat assistants matching those IDs are deleted.
##### delete_all: `bool`
Whether to delete all chat assistants owned by the current user when `ids` is omitted, or set to `None` or an empty list. Defaults to `False`.
#### Returns
- Success: No value is returned.
@@ -1275,6 +1294,7 @@ from ragflow_sdk import RAGFlow
rag_object = RAGFlow(api_key="<YOUR_API_KEY>", base_url="http://<YOUR_BASE_URL>:9380")
rag_object.delete_chats(ids=["id_1","id_2"])
rag_object.delete_chats(delete_all=True)
```
---
@@ -1481,20 +1501,24 @@ for session in assistant.list_sessions():
### Delete chat assistant's sessions
```python
Chat.delete_sessions(ids:list[str] = None)
Chat.delete_sessions(ids: list[str] | None = None, delete_all: bool = False)
```
Deletes sessions of the current chat assistant by ID.
#### Parameters
##### ids: `list[str]`
##### ids: `list[str]` or `None`
The IDs of the sessions to delete. Defaults to `None`.
- If omitted, or set to `null` or an empty array, no sessions are deleted.
- If an array of IDs is provided, only the sessions matching those IDs are deleted.
##### delete_all: `bool`
Whether to delete all sessions of the current chat assistant when `ids` is omitted, or set to `None` or an empty list. Defaults to `False`.
#### Returns
- Success: No value is returned.
@@ -1509,6 +1533,7 @@ rag_object = RAGFlow(api_key="<YOUR_API_KEY>", base_url="http://<YOUR_BASE_URL>:
assistant = rag_object.list_chats(name="Miss R")
assistant = assistant[0]
assistant.delete_sessions(ids=["id_1","id_2"])
assistant.delete_sessions(delete_all=True)
```
---
@@ -1802,20 +1827,24 @@ for session in sessions:
### Delete agent's sessions
```python
Agent.delete_sessions(ids: list[str] = None)
Agent.delete_sessions(ids: list[str] | None = None, delete_all: bool = False)
```
Deletes sessions of an agent by ID.
#### Parameters
##### ids: `list[str]`
##### ids: `list[str]` or `None`
The IDs of the sessions to delete. Defaults to `None`.
- If omitted, or set to `null` or an empty array, no sessions are deleted.
- If omitted, or set to `None` or an empty array, no sessions are deleted.
- If an array of IDs is provided, only the sessions matching those IDs are deleted.
##### delete_all: `bool`
Whether to delete all sessions of the current agent when `ids` is omitted, or set to `None` or an empty list. Defaults to `False`.
#### Returns
- Success: No value is returned.
@@ -1830,6 +1859,7 @@ rag_object = RAGFlow(api_key="<YOUR_API_KEY>", base_url="http://<YOUR_BASE_URL>:
AGENT_id = "AGENT_ID"
agent = rag_object.list_agents(id = AGENT_id)[0]
agent.delete_sessions(ids=["id_1","id_2"])
agent.delete_sessions(delete_all=True)
```
---

View File

@@ -122,6 +122,7 @@ class InfinityConnection(InfinityConnectionBase):
index_names = index_names.split(",")
assert isinstance(index_names, list) and len(index_names) > 0
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
df_list = list()
table_list = list()
@@ -266,6 +267,7 @@ class InfinityConnection(InfinityConnectionBase):
total_hits_count += int(extra_result["total_hits_count"])
self.logger.debug(f"INFINITY search table: {str(table_name)}, result: {str(mem_res)}")
df_list.append(mem_res)
finally:
self.connPool.release_conn(inf_conn)
res = self.concat_dataframes(df_list, output)
if match_expressions:
@@ -281,6 +283,7 @@ class InfinityConnection(InfinityConnectionBase):
order_by.asc("forget_at_flt")
# query
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
table_name = f"{index_name}_{memory_id}"
table_instance = db_instance.get_table(table_name)
@@ -302,6 +305,7 @@ class InfinityConnection(InfinityConnectionBase):
mem_res, _ = builder.option({"total_hits_count": True}).to_df()
res = self.concat_dataframes(mem_res, output_fields)
res.head(limit)
finally:
self.connPool.release_conn(inf_conn)
return res
@@ -311,6 +315,7 @@ class InfinityConnection(InfinityConnectionBase):
order_by.asc("valid_at_flt")
# query
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
table_name = f"{index_name}_{memory_id}"
table_instance = db_instance.get_table(table_name)
@@ -332,11 +337,13 @@ class InfinityConnection(InfinityConnectionBase):
mem_res, _ = builder.option({"total_hits_count": True}).to_df()
res = self.concat_dataframes(mem_res, output_fields)
res.head(limit)
finally:
self.connPool.release_conn(inf_conn)
return res
def get(self, message_id: str, index_name: str, memory_ids: list[str]) -> dict | None:
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
df_list = list()
assert isinstance(memory_ids, list)
@@ -352,6 +359,7 @@ class InfinityConnection(InfinityConnectionBase):
mem_res, _ = table_instance.output(["*"]).filter(f"id = '{message_id}'").to_df()
self.logger.debug(f"INFINITY get table: {str(table_list)}, result: {str(mem_res)}")
df_list.append(mem_res)
finally:
self.connPool.release_conn(inf_conn)
res = self.concat_dataframes(df_list, ["id"])
fields = set(res.columns.tolist())
@@ -362,6 +370,7 @@ class InfinityConnection(InfinityConnectionBase):
if not documents:
return []
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
table_name = f"{index_name}_{memory_id}"
vector_size = int(len(documents[0]["content_embed"]))
@@ -421,12 +430,14 @@ class InfinityConnection(InfinityConnectionBase):
str_filter = f"id IN ({str_ids})"
table_instance.delete(str_filter)
table_instance.insert(docs)
finally:
self.connPool.release_conn(inf_conn)
self.logger.debug(f"INFINITY inserted into {table_name} {str_ids}.")
return []
def update(self, condition: dict, new_value: dict, index_name: str, memory_id: str) -> bool:
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
table_name = f"{index_name}_{memory_id}"
table_instance = db_instance.get_table(table_name)
@@ -457,6 +468,7 @@ class InfinityConnection(InfinityConnectionBase):
self.logger.debug(f"INFINITY update table {table_name}, filter {filter}, newValue {new_value}.")
table_instance.update(filter, update_dict)
finally:
self.connPool.release_conn(inf_conn)
return True

View File

@@ -110,6 +110,7 @@ class InfinityConnection(InfinityConnectionBase):
index_names = index_names.split(",")
assert isinstance(index_names, list) and len(index_names) > 0
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
df_list = list()
table_list = list()
@@ -270,7 +271,6 @@ class InfinityConnection(InfinityConnectionBase):
total_hits_count += int(extra_result["total_hits_count"])
self.logger.debug(f"INFINITY search table: {str(table_name)}, result: {str(kb_res)}")
df_list.append(kb_res)
self.connPool.release_conn(inf_conn)
res = self.concat_dataframes(df_list, output)
if match_expressions and score_column:
res["_score"] = res[score_column] + res[PAGERANK_FLD]
@@ -278,9 +278,12 @@ class InfinityConnection(InfinityConnectionBase):
res = res.head(limit)
self.logger.debug(f"INFINITY search final result: {str(res)}")
return res, total_hits_count
finally:
self.connPool.release_conn(inf_conn)
def get(self, chunk_id: str, index_name: str, knowledgebase_ids: list[str]) -> dict | None:
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
df_list = list()
assert isinstance(knowledgebase_ids, list)
@@ -300,6 +303,7 @@ class InfinityConnection(InfinityConnectionBase):
kb_res, _ = table_instance.output(["*"]).filter(f"id = '{chunk_id}'").to_df()
self.logger.debug(f"INFINITY get table: {str(table_list)}, result: {str(kb_res)}")
df_list.append(kb_res)
finally:
self.connPool.release_conn(inf_conn)
res = self.concat_dataframes(df_list, ["id"])
fields = set(res.columns.tolist())
@@ -312,6 +316,7 @@ class InfinityConnection(InfinityConnectionBase):
def insert(self, documents: list[dict], index_name: str, knowledgebase_id: str = None) -> list[str]:
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
if index_name.startswith("ragflow_doc_meta_"):
table_name = index_name
@@ -445,6 +450,7 @@ class InfinityConnection(InfinityConnectionBase):
# logger.info(f"insert position_int: {doc['position_int']}")
# logger.info(f"InfinityConnection.insert {json.dumps(documents)}")
table_instance.insert(docs)
finally:
self.connPool.release_conn(inf_conn)
self.logger.debug(f"INFINITY inserted into {table_name} {str_ids}.")
return []
@@ -453,6 +459,7 @@ class InfinityConnection(InfinityConnectionBase):
# if 'position_int' in newValue:
# logger.info(f"update position_int: {newValue['position_int']}")
inf_conn = self.connPool.get_conn()
try:
db_instance = inf_conn.get_database(self.dbName)
if index_name.startswith("ragflow_doc_meta_"):
table_name = index_name
@@ -566,6 +573,7 @@ class InfinityConnection(InfinityConnectionBase):
{k: "###".join(v)})
table_instance.update(filter, new_value)
finally:
self.connPool.release_conn(inf_conn)
return True

View File

@@ -87,8 +87,11 @@ class Agent(Base):
return result_list
raise Exception(res.get("message"))
def delete_sessions(self, ids: list[str] | None = None):
res = self.rm(f"/agents/{self.id}/sessions", {"ids": ids})
def delete_sessions(self, ids: list[str] | None = None, delete_all: bool = False):
payload = {"ids": ids}
if delete_all:
payload["delete_all"] = True
res = self.rm(f"/agents/{self.id}/sessions", payload)
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))

View File

@@ -88,8 +88,8 @@ class Chat(Base):
return result_list
raise Exception(res["message"])
def delete_sessions(self, ids: list[str] | None = None):
res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
def delete_sessions(self, ids: list[str] | None = None, delete_all: bool = False):
res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids, "delete_all": delete_all})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))

View File

@@ -95,8 +95,8 @@ class DataSet(Base):
return documents
raise Exception(res["message"])
def delete_documents(self, ids: list[str] | None = None):
res = self.rm(f"/datasets/{self.id}/documents", {"ids": ids})
def delete_documents(self, ids: list[str] | None = None, delete_all: bool = False):
res = self.rm(f"/datasets/{self.id}/documents", {"ids": ids, "delete_all": delete_all})
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])

View File

@@ -94,8 +94,8 @@ class Document(Base):
return Chunk(self.rag, res["data"].get("chunk"))
raise Exception(res.get("message"))
def delete_chunks(self, ids: list[str] | None = None):
res = self.rm(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks", {"chunk_ids": ids})
def delete_chunks(self, ids: list[str] | None = None, delete_all: bool = False):
res = self.rm(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks", {"chunk_ids": ids, "delete_all": delete_all})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))

View File

@@ -79,8 +79,8 @@ class RAGFlow:
return DataSet(self, res["data"])
raise Exception(res["message"])
def delete_datasets(self, ids: list[str] | None = None):
res = self.delete("/datasets", {"ids": ids})
def delete_datasets(self, ids: list[str] | None = None, delete_all: bool = False):
res = self.delete("/datasets", {"ids": ids, "delete_all": delete_all})
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
@@ -185,8 +185,8 @@ class RAGFlow:
return Chat(self, res["data"])
raise Exception(res["message"])
def delete_chats(self, ids: list[str] | None = None):
res = self.delete("/chats", {"ids": ids})
def delete_chats(self, ids: list[str] | None = None, delete_all: bool = False):
res = self.delete("/chats", {"ids": ids, "delete_all": delete_all})
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])

View File

@@ -59,20 +59,7 @@ def delete_datasets(auth, payload=None, *, headers=HEADERS, data=None):
def delete_all_datasets(auth, *, page_size=1000):
# Dataset DELETE now treats null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
dataset_ids = []
while True:
res = list_datasets(auth, {"page": page, "page_size": page_size})
data = res.get("data") or []
dataset_ids.extend(dataset["id"] for dataset in data)
if len(data) < page_size:
break
page += 1
if not dataset_ids:
return {"code": 0, "message": ""}
return delete_datasets(auth, {"ids": dataset_ids})
return delete_datasets(auth, {"ids": None, "delete_all": True})
def batch_create_datasets(auth, num):
@@ -146,20 +133,7 @@ def delete_documents(auth, dataset_id, payload=None):
def delete_all_documents(auth, dataset_id, *, page_size=1000):
# Document DELETE now treats missing/null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
document_ids = []
while True:
res = list_documents(auth, dataset_id, {"page": page, "page_size": page_size})
docs = (res.get("data") or {}).get("docs") or []
document_ids.extend(doc["id"] for doc in docs)
if len(docs) < page_size:
break
page += 1
if not document_ids:
return {"code": 0, "message": ""}
return delete_documents(auth, dataset_id, {"ids": document_ids})
return delete_documents(auth, dataset_id, {"ids": None, "delete_all": True})
def parse_documents(auth, dataset_id, payload=None):
@@ -212,20 +186,7 @@ def delete_chunks(auth, dataset_id, document_id, payload=None):
def delete_all_chunks(auth, dataset_id, document_id, *, page_size=1000):
# Chunk DELETE now treats missing/null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
chunk_ids = []
while True:
res = list_chunks(auth, dataset_id, document_id, {"page": page, "page_size": page_size})
chunks = (res.get("data") or {}).get("chunks") or []
chunk_ids.extend(chunk["id"] for chunk in chunks)
if len(chunks) < page_size:
break
page += 1
if not chunk_ids:
return {"code": 0, "message": ""}
return delete_chunks(auth, dataset_id, document_id, {"chunk_ids": chunk_ids})
return delete_chunks(auth, dataset_id, document_id, {"chunk_ids": None, "delete_all": True})
def retrieval_chunks(auth, payload=None):
@@ -268,20 +229,7 @@ def delete_chat_assistants(auth, payload=None):
def delete_all_chat_assistants(auth, *, page_size=1000):
# Chat DELETE now treats null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
chat_ids = []
while True:
res = list_chat_assistants(auth, {"page": page, "page_size": page_size})
data = res.get("data") or []
chat_ids.extend(chat["id"] for chat in data)
if len(data) < page_size:
break
page += 1
if not chat_ids:
return {"code": 0, "message": ""}
return delete_chat_assistants(auth, {"ids": chat_ids})
return delete_chat_assistants(auth, {"ids": None, "delete_all": True})
def batch_create_chat_assistants(auth, num):
@@ -318,20 +266,7 @@ def delete_session_with_chat_assistants(auth, chat_assistant_id, payload=None):
def delete_all_sessions_with_chat_assistant(auth, chat_assistant_id, *, page_size=1000):
# Session DELETE now treats missing/null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
session_ids = []
while True:
res = list_session_with_chat_assistants(auth, chat_assistant_id, {"page": page, "page_size": page_size})
data = res.get("data") or []
session_ids.extend(session["id"] for session in data)
if len(data) < page_size:
break
page += 1
if not session_ids:
return {"code": 0, "message": ""}
return delete_session_with_chat_assistants(auth, chat_assistant_id, {"ids": session_ids})
return delete_session_with_chat_assistants(auth, chat_assistant_id, {"ids": None, "delete_all": True})
def batch_add_sessions_with_chat_assistant(auth, chat_assistant_id, num):
@@ -439,20 +374,7 @@ def delete_agent_sessions(auth, agent_id, payload=None):
def delete_all_agent_sessions(auth, agent_id, *, page_size=1000):
# Agent session DELETE now treats missing/null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
session_ids = []
while True:
res = list_agent_sessions(auth, agent_id, {"page": page, "page_size": page_size})
data = res.get("data") or []
session_ids.extend(session["id"] for session in data)
if len(data) < page_size:
break
page += 1
if not session_ids:
return {"code": 0, "message": ""}
return delete_agent_sessions(auth, agent_id, {"ids": session_ids})
return delete_agent_sessions(auth, agent_id, {"ids": None, "delete_all": True})
def agent_completions(auth, agent_id, payload=None):

View File

@@ -26,33 +26,11 @@ def batch_create_datasets(client: RAGFlow, num: int) -> list[DataSet]:
def delete_all_datasets(client: RAGFlow, *, page_size: int = 1000) -> None:
# Dataset DELETE now treats null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
dataset_ids: list[str] = []
while True:
datasets = client.list_datasets(page=page, page_size=page_size)
dataset_ids.extend(dataset.id for dataset in datasets)
if len(datasets) < page_size:
break
page += 1
if dataset_ids:
client.delete_datasets(ids=dataset_ids)
client.delete_datasets(delete_all=True)
def delete_all_chats(client: RAGFlow, *, page_size: int = 1000) -> None:
# Chat DELETE now treats null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
chat_ids: list[str] = []
while True:
chats = client.list_chats(page=page, page_size=page_size)
chat_ids.extend(chat.id for chat in chats)
if len(chats) < page_size:
break
page += 1
if chat_ids:
client.delete_chats(ids=chat_ids)
client.delete_chats(delete_all=True)
# FILE MANAGEMENT WITHIN DATASET
@@ -68,48 +46,15 @@ def bulk_upload_documents(dataset: DataSet, num: int, tmp_path: Path) -> list[Do
def delete_all_documents(dataset: DataSet, *, page_size: int = 1000) -> None:
# Document DELETE now treats missing/null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
document_ids: list[str] = []
while True:
documents = dataset.list_documents(page=page, page_size=page_size)
document_ids.extend(document.id for document in documents)
if len(documents) < page_size:
break
page += 1
if document_ids:
dataset.delete_documents(ids=document_ids)
dataset.delete_documents(delete_all=True)
def delete_all_sessions(chat_assistant: Chat, *, page_size: int = 1000) -> None:
# Session DELETE now treats missing/null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
session_ids: list[str] = []
while True:
sessions = chat_assistant.list_sessions(page=page, page_size=page_size)
session_ids.extend(session.id for session in sessions)
if len(sessions) < page_size:
break
page += 1
if session_ids:
chat_assistant.delete_sessions(ids=session_ids)
chat_assistant.delete_sessions(delete_all=True)
def delete_all_chunks(document: Document, *, page_size: int = 1000) -> None:
# Chunk DELETE now treats missing/null/empty ids as a no-op, so cleanup must enumerate explicit ids.
page = 1
chunk_ids: list[str] = []
while True:
chunks = document.list_chunks(page=page, page_size=page_size)
chunk_ids.extend(chunk.id for chunk in chunks)
if len(chunks) < page_size:
break
page += 1
if chunk_ids:
document.delete_chunks(ids=chunk_ids)
document.delete_chunks(delete_all=True)
# CHUNK MANAGEMENT WITHIN DATASET