mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Implement Delete in GO and refactor functions (#13974)
### What problem does this PR solve? Implement Delete in GO and refactor functions ### Type of change - [x] Refactoring <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added a remove_chunks command to delete specific or all chunks from a document. * Added new endpoints for chunk removal and chunk update. * **Refactor** * Renamed index commands to dataset/metadata table terminology and updated REST routes accordingly. * Updated chunk update flow to a JSON POST style and improved metadata error messages. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
@@ -84,10 +84,10 @@ sql_command: login_user
|
||||
| list_user_chats
|
||||
| create_user_chat
|
||||
| drop_user_chat
|
||||
| create_index
|
||||
| drop_index
|
||||
| create_doc_meta_index
|
||||
| drop_doc_meta_index
|
||||
| create_dataset_table
|
||||
| drop_dataset_table
|
||||
| create_metadata_table
|
||||
| drop_metadata_table
|
||||
| list_user_model_providers
|
||||
| list_user_default_models
|
||||
| parse_dataset_docs
|
||||
@@ -102,6 +102,7 @@ sql_command: login_user
|
||||
| update_chunk
|
||||
| set_metadata
|
||||
| remove_tags
|
||||
| remove_chunks
|
||||
| create_chat_session
|
||||
| drop_chat_session
|
||||
| list_chat_sessions
|
||||
@@ -143,6 +144,7 @@ ALTER: "ALTER"i
|
||||
ACTIVE: "ACTIVE"i
|
||||
ADMIN: "ADMIN"i
|
||||
PASSWORD: "PASSWORD"i
|
||||
DATASET_TABLE: "DATASET TABLE"i
|
||||
DATASET: "DATASET"i
|
||||
DATASETS: "DATASETS"i
|
||||
OF: "OF"i
|
||||
@@ -187,7 +189,8 @@ IMPORT: "IMPORT"i
|
||||
INTO: "INTO"i
|
||||
IN: "IN"i
|
||||
WITH: "WITH"i
|
||||
VECTOR_SIZE: "VECTOR_SIZE"i
|
||||
VECTOR: "VECTOR"i
|
||||
SIZE: "SIZE"i
|
||||
PARSER: "PARSER"i
|
||||
PIPELINE: "PIPELINE"i
|
||||
SEARCH: "SEARCH"i
|
||||
@@ -210,13 +213,12 @@ LICENSE: "LICENSE"i
|
||||
CHECK: "CHECK"i
|
||||
CONFIG: "CONFIG"i
|
||||
INDEX: "INDEX"i
|
||||
DOC_META: "DOC_META"i
|
||||
TABLE: "TABLE"i
|
||||
CHUNK: "CHUNK"i
|
||||
CHUNKS: "CHUNKS"i
|
||||
GET: "GET"i
|
||||
INSERT: "INSERT"i
|
||||
PAGE: "PAGE"i
|
||||
SIZE: "SIZE"i
|
||||
KEYWORDS: "KEYWORDS"i
|
||||
AVAILABLE: "AVAILABLE"i
|
||||
FILE: "FILE"i
|
||||
@@ -345,10 +347,6 @@ list_user_agents: LIST AGENTS ";"
|
||||
list_user_chats: LIST CHATS ";"
|
||||
create_user_chat: CREATE CHAT quoted_string ";"
|
||||
drop_user_chat: DROP CHAT quoted_string ";"
|
||||
create_index: CREATE INDEX FOR DATASET quoted_string VECTOR_SIZE NUMBER ";"
|
||||
drop_index: DROP INDEX FOR DATASET quoted_string ";"
|
||||
create_doc_meta_index: CREATE INDEX DOC_META ";"
|
||||
drop_doc_meta_index: DROP INDEX DOC_META ";"
|
||||
create_chat_session: CREATE CHAT quoted_string SESSION ";"
|
||||
drop_chat_session: DROP CHAT quoted_string SESSION quoted_string ";"
|
||||
list_chat_sessions: LIST CHAT quoted_string SESSIONS ";"
|
||||
@@ -359,18 +357,23 @@ import_docs_into_dataset: IMPORT quoted_string INTO DATASET quoted_string ";"
|
||||
search_on_datasets: SEARCH quoted_string ON DATASETS quoted_string ";"
|
||||
get_chunk: GET CHUNK quoted_string ";"
|
||||
list_chunks: LIST CHUNKS OF DOCUMENT quoted_string ("PAGE" NUMBER)? ("SIZE" NUMBER)? ("KEYWORDS" quoted_string)? ("AVAILABLE" NUMBER)? ";"
|
||||
set_metadata: SET METADATA OF DOCUMENT quoted_string TO quoted_string ";"
|
||||
remove_tags: REMOVE TAGS quoted_string (COMMA quoted_string)* FROM DATASET quoted_string ";"
|
||||
remove_chunks: REMOVE CHUNKS quoted_string (COMMA quoted_string)* FROM DOCUMENT quoted_string ";"
|
||||
| REMOVE ALL CHUNKS FROM DOCUMENT quoted_string ";"
|
||||
|
||||
parse_dataset_docs: PARSE quoted_string OF DATASET quoted_string ";"
|
||||
parse_dataset_sync: PARSE DATASET quoted_string SYNC ";"
|
||||
parse_dataset_async: PARSE DATASET quoted_string ASYNC ";"
|
||||
|
||||
update_chunk: UPDATE CHUNK quoted_string OF DATASET quoted_string SET quoted_string ";"
|
||||
set_metadata: SET METADATA OF DOCUMENT quoted_string TO quoted_string ";"
|
||||
remove_tags: REMOVE TAGS quoted_string (COMMA quoted_string)* FROM DATASET quoted_string ";"
|
||||
|
||||
// Internal CLI for GO
|
||||
// Internal CLI only for GO
|
||||
create_dataset_table: CREATE DATASET TABLE quoted_string VECTOR SIZE NUMBER ";"
|
||||
drop_dataset_table: DROP DATASET TABLE quoted_string ";"
|
||||
create_metadata_table: CREATE METADATA TABLE ";"
|
||||
drop_metadata_table: DROP METADATA TABLE ";"
|
||||
insert_dataset_from_file: INSERT DATASET FROM FILE quoted_string ";"
|
||||
insert_metadata_from_file: INSERT METADATA FROM FILE quoted_string ";"
|
||||
update_chunk: UPDATE CHUNK quoted_string OF DATASET quoted_string SET quoted_string ";"
|
||||
|
||||
identifier_list: identifier (COMMA identifier)*
|
||||
|
||||
@@ -690,30 +693,29 @@ class RAGFlowCLITransformer(Transformer):
|
||||
chat_name = items[2].children[0].strip("'\"")
|
||||
return {"type": "drop_user_chat", "chat_name": chat_name}
|
||||
|
||||
def create_index(self, items):
|
||||
# items: CREATE, INDEX, FOR, DATASET, quoted_string, VECTOR_SIZE, NUMBER, ";"
|
||||
def create_dataset_table(self, items):
|
||||
dataset_name = None
|
||||
vector_size = None
|
||||
for i, item in enumerate(items):
|
||||
if hasattr(item, 'data') and item.data == 'quoted_string':
|
||||
dataset_name = item.children[0].strip("'\"")
|
||||
if hasattr(item, 'type') and item.type == 'NUMBER':
|
||||
if i > 0 and items[i-1].type == 'VECTOR_SIZE':
|
||||
if i > 0 and items[i-1].type == 'SIZE' and items[i-2].type == 'VECTOR':
|
||||
vector_size = int(item)
|
||||
return {"type": "create_index", "dataset_name": dataset_name, "vector_size": vector_size}
|
||||
return {"type": "create_dataset_table", "dataset_name": dataset_name, "vector_size": vector_size}
|
||||
|
||||
def drop_index(self, items):
|
||||
def drop_dataset_table(self, items):
|
||||
dataset_name = None
|
||||
for item in items:
|
||||
if hasattr(item, 'data') and item.data == 'quoted_string':
|
||||
dataset_name = item.children[0].strip("'\"")
|
||||
return {"type": "drop_index", "dataset_name": dataset_name}
|
||||
return {"type": "drop_dataset_table", "dataset_name": dataset_name}
|
||||
|
||||
def create_doc_meta_index(self, items):
|
||||
return {"type": "create_doc_meta_index"}
|
||||
def create_metadata_table(self, items):
|
||||
return {"type": "create_metadata_table"}
|
||||
|
||||
def drop_doc_meta_index(self, items):
|
||||
return {"type": "drop_doc_meta_index"}
|
||||
def drop_metadata_table(self, items):
|
||||
return {"type": "drop_metadata_table"}
|
||||
|
||||
def list_user_model_providers(self, items):
|
||||
return {"type": "list_user_model_providers"}
|
||||
@@ -825,6 +827,35 @@ class RAGFlowCLITransformer(Transformer):
|
||||
break
|
||||
return {"type": "remove_tags", "dataset_name": dataset_name, "tags": tags}
|
||||
|
||||
def remove_chunks(self, items):
|
||||
# Handle two cases:
|
||||
# 1. REMOVE CHUNKS quoted_string (COMMA quoted_string)* FROM DOCUMENT quoted_string ";"
|
||||
# 2. REMOVE ALL CHUNKS FROM DOCUMENT quoted_string ";"
|
||||
|
||||
# Check if it's "REMOVE ALL CHUNKS"
|
||||
for item in items:
|
||||
if hasattr(item, 'type') and item.type == 'ALL':
|
||||
# Find doc_id
|
||||
for j, inner_item in enumerate(items):
|
||||
if hasattr(inner_item, 'type') and inner_item.type == 'DOCUMENT':
|
||||
doc_id = items[j + 1].children[0].strip("'\"")
|
||||
return {"type": "remove_chunks", "doc_id": doc_id, "delete_all": True}
|
||||
|
||||
# Otherwise, we have chunk_ids
|
||||
chunk_ids = []
|
||||
doc_id = None
|
||||
for i, item in enumerate(items):
|
||||
if hasattr(item, 'type') and item.type == 'DOCUMENT':
|
||||
doc_id = items[i + 1].children[0].strip("'\"")
|
||||
elif hasattr(item, 'children') and item.children:
|
||||
val = item.children[0].strip("'\"")
|
||||
# Skip if it's "FROM" or "DOCUMENT"
|
||||
if val.upper() in ['FROM', 'DOCUMENT']:
|
||||
continue
|
||||
chunk_ids.append(val)
|
||||
|
||||
return {"type": "remove_chunks", "doc_id": doc_id, "chunk_ids": chunk_ids}
|
||||
|
||||
def list_chunks(self, items):
|
||||
doc_id = items[4].children[0].strip("'\"")
|
||||
result = {"type": "list_chunks", "doc_id": doc_id}
|
||||
|
||||
@@ -1029,7 +1029,7 @@ class RAGFlowClient:
|
||||
else:
|
||||
print(f"Fail to create chat {chat_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def create_index(self, command):
|
||||
def create_dataset_table(self, command):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
@@ -1045,15 +1045,15 @@ class RAGFlowClient:
|
||||
# Build payload
|
||||
payload = {"kb_id": dataset_id, "vector_size": vector_size}
|
||||
# Call API
|
||||
response = self.http_client.request("POST", "/kb/index", json_body=payload,
|
||||
response = self.http_client.request("POST", "/kb/doc_engine_table", json_body=payload,
|
||||
use_api_base=False, auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200 and res_json.get("code") == 0:
|
||||
print(f"Success to create index for dataset: {dataset_name}")
|
||||
print(f"Success to create table for dataset: {dataset_name}")
|
||||
else:
|
||||
print(f"Fail to create index for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
print(f"Fail to create table for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
|
||||
def drop_index(self, command):
|
||||
def drop_dataset_table(self, command):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
@@ -1062,41 +1062,41 @@ class RAGFlowClient:
|
||||
dataset_id = self._get_dataset_id(dataset_name)
|
||||
if dataset_id is None:
|
||||
return
|
||||
# Call API to delete index
|
||||
# Call API to delete table
|
||||
payload = {"kb_id": dataset_id}
|
||||
response = self.http_client.request("DELETE", "/kb/index", json_body=payload,
|
||||
response = self.http_client.request("DELETE", "/kb/doc_engine_table", json_body=payload,
|
||||
use_api_base=False, auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200 and res_json.get("code") == 0:
|
||||
print(f"Success to drop index for dataset: {dataset_name}")
|
||||
print(f"Success to drop table for dataset: {dataset_name}")
|
||||
else:
|
||||
print(f"Fail to drop index for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
print(f"Fail to drop table for dataset {dataset_name}, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
|
||||
def create_doc_meta_index(self, command):
|
||||
def create_metadata_table(self, command):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
# Call API to create doc meta index
|
||||
response = self.http_client.request("POST", "/tenant/doc_meta_index",
|
||||
# Call API to create metadata table
|
||||
response = self.http_client.request("POST", "/tenant/doc_engine_metadata_table",
|
||||
use_api_base=False, auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200 and res_json.get("code") == 0:
|
||||
print("Success to create doc meta index")
|
||||
print("Success to create metadata table")
|
||||
else:
|
||||
print(f"Fail to create doc meta index, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
print(f"Fail to create metadata table, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
|
||||
def drop_doc_meta_index(self, command):
|
||||
def drop_metadata_table(self, command):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
# Call API to delete doc meta index
|
||||
response = self.http_client.request("DELETE", "/tenant/doc_meta_index",
|
||||
# Call API to delete metadata table
|
||||
response = self.http_client.request("DELETE", "/tenant/doc_engine_metadata_table",
|
||||
use_api_base=False, auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200 and res_json.get("code") == 0:
|
||||
print("Success to drop doc meta index")
|
||||
print("Success to drop metadata table")
|
||||
else:
|
||||
print(f"Fail to drop doc meta index, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
print(f"Fail to drop metadata table, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
|
||||
def drop_user_chat(self, command):
|
||||
if self.server_type != "user":
|
||||
@@ -1548,9 +1548,13 @@ class RAGFlowClient:
|
||||
print(f"Invalid JSON body: {e}")
|
||||
return
|
||||
|
||||
# Call PUT /datasets/{dataset_id}/documents/{doc_id}/chunks/{chunk_id}
|
||||
path = f"/datasets/{dataset_id}/documents/{doc_id}/chunks/{chunk_id}"
|
||||
response = self.http_client.request("PUT", path, json_body=payload, use_api_base=True, auth_kind="api")
|
||||
# Add IDs to payload
|
||||
payload["dataset_id"] = dataset_id
|
||||
payload["document_id"] = doc_id
|
||||
payload["chunk_id"] = chunk_id
|
||||
|
||||
# Call POST /v1/chunk/update
|
||||
response = self.http_client.request("POST", "/chunk/update", json_body=payload, use_api_base=False, auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
if res_json.get("code") == 0:
|
||||
@@ -1583,7 +1587,7 @@ class RAGFlowClient:
|
||||
else:
|
||||
print(f"Fail to set metadata, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
else:
|
||||
print(f"Fail to set metadata, HTTP {response.status_code}")
|
||||
print(f"Fail to set metadata, HTTP {response.status_code}: {res_json.get('message', 'no message')}")
|
||||
|
||||
def remove_tags(self, command_dict):
|
||||
if self.server_type != "user":
|
||||
@@ -1613,6 +1617,31 @@ class RAGFlowClient:
|
||||
else:
|
||||
print(f"Fail to remove tags, HTTP {response.status_code}")
|
||||
|
||||
def remove_chunks(self, command_dict):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
|
||||
doc_id = command_dict["doc_id"]
|
||||
payload = {"doc_id": doc_id}
|
||||
|
||||
if command_dict.get("delete_all"):
|
||||
payload["delete_all"] = True
|
||||
elif command_dict.get("chunk_ids"):
|
||||
payload["chunk_ids"] = command_dict["chunk_ids"]
|
||||
|
||||
response = self.http_client.request("POST", "/chunk/rm", json_body=payload,
|
||||
use_api_base=False, auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
if res_json.get("code") == 0:
|
||||
deleted_count = res_json.get("data", 0)
|
||||
print(f"Success to remove chunks from document {doc_id}: {deleted_count} chunks deleted")
|
||||
else:
|
||||
print(f"Fail to remove chunks, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
else:
|
||||
print(f"Fail to remove chunks, HTTP {response.status_code}")
|
||||
|
||||
def list_chunks(self, command_dict):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
@@ -1976,14 +2005,14 @@ def run_command(client: RAGFlowClient, command_dict: dict):
|
||||
client.create_user_chat(command_dict)
|
||||
case "drop_user_chat":
|
||||
client.drop_user_chat(command_dict)
|
||||
case "create_index":
|
||||
client.create_index(command_dict)
|
||||
case "drop_index":
|
||||
client.drop_index(command_dict)
|
||||
case "create_doc_meta_index":
|
||||
client.create_doc_meta_index(command_dict)
|
||||
case "drop_doc_meta_index":
|
||||
client.drop_doc_meta_index(command_dict)
|
||||
case "create_dataset_table":
|
||||
client.create_dataset_table(command_dict)
|
||||
case "drop_dataset_table":
|
||||
client.drop_dataset_table(command_dict)
|
||||
case "create_metadata_table":
|
||||
client.create_metadata_table(command_dict)
|
||||
case "drop_metadata_table":
|
||||
client.drop_metadata_table(command_dict)
|
||||
case "create_chat_session":
|
||||
client.create_chat_session(command_dict)
|
||||
case "drop_chat_session":
|
||||
@@ -2016,6 +2045,8 @@ def run_command(client: RAGFlowClient, command_dict: dict):
|
||||
return client.set_metadata(command_dict)
|
||||
case "remove_tags":
|
||||
return client.remove_tags(command_dict)
|
||||
case "remove_chunks":
|
||||
return client.remove_chunks(command_dict)
|
||||
case "list_chunks":
|
||||
return client.list_chunks(command_dict)
|
||||
case "meta":
|
||||
@@ -2077,10 +2108,6 @@ LIST METADATA OF DATASETS <dataset>[, <dataset>]*
|
||||
LIST METADATA SUMMARY OF DATASET <dataset> DOCUMENTS <doc_id>[, <doc_id>]*
|
||||
GET CHUNK <chunk_id>
|
||||
LIST CHUNKS OF DOCUMENT <doc_id> [PAGE <page>] [SIZE <size>] [KEYWORDS <keywords>] [AVAILABLE <0|1>]
|
||||
CREATE INDEX FOR DATASET <dataset> VECTOR_SIZE <vector_size>
|
||||
DROP INDEX FOR DATASET <dataset>
|
||||
CREATE INDEX DOC_META
|
||||
DROP INDEX DOC_META
|
||||
|
||||
Meta Commands:
|
||||
\\?, \\h, \\help Show this help
|
||||
|
||||
Reference in New Issue
Block a user