mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Implement GetChunk() in Infinity in GO (#13758)
### What problem does this PR solve? Implement GetChunk() in Infinity in GO Add cli: GET CHUNK 'XXX'; LIST CHUNKS OF DOCUMENT 'XXX'; ### Type of change - [x] Refactoring
This commit is contained in:
@@ -91,6 +91,8 @@ sql_command: login_user
|
||||
| parse_dataset_async
|
||||
| import_docs_into_dataset
|
||||
| search_on_datasets
|
||||
| get_chunk
|
||||
| list_chunks
|
||||
| create_chat_session
|
||||
| drop_chat_session
|
||||
| list_chat_sessions
|
||||
@@ -164,6 +166,7 @@ DEFAULT: "DEFAULT"i
|
||||
CHATS: "CHATS"i
|
||||
CHAT: "CHAT"i
|
||||
FILES: "FILES"i
|
||||
DOCUMENT: "DOCUMENT"i
|
||||
DOCUMENTS: "DOCUMENTS"i
|
||||
METADATA: "METADATA"i
|
||||
SUMMARY: "SUMMARY"i
|
||||
@@ -194,6 +197,13 @@ FINGERPRINT: "FINGERPRINT"i
|
||||
LICENSE: "LICENSE"i
|
||||
CHECK: "CHECK"i
|
||||
CONFIG: "CONFIG"i
|
||||
CHUNK: "CHUNK"i
|
||||
CHUNKS: "CHUNKS"i
|
||||
GET: "GET"i
|
||||
PAGE: "PAGE"i
|
||||
SIZE: "SIZE"i
|
||||
KEYWORDS: "KEYWORDS"i
|
||||
AVAILABLE: "AVAILABLE"i
|
||||
|
||||
login_user: LOGIN USER quoted_string ";"
|
||||
list_services: LIST SERVICES ";"
|
||||
@@ -321,6 +331,8 @@ list_user_model_providers: LIST MODEL PROVIDERS ";"
|
||||
list_user_default_models: LIST DEFAULT MODELS ";"
|
||||
import_docs_into_dataset: IMPORT quoted_string INTO DATASET quoted_string ";"
|
||||
search_on_datasets: SEARCH quoted_string ON DATASETS quoted_string ";"
|
||||
get_chunk: GET CHUNK quoted_string ";"
|
||||
list_chunks: LIST CHUNKS OF DOCUMENT quoted_string ("PAGE" NUMBER)? ("SIZE" NUMBER)? ("KEYWORDS" quoted_string)? ("AVAILABLE" NUMBER)? ";"
|
||||
|
||||
parse_dataset_docs: PARSE quoted_string OF DATASET quoted_string ";"
|
||||
parse_dataset_sync: PARSE DATASET quoted_string SYNC ";"
|
||||
@@ -698,6 +710,28 @@ class RAGFlowCLITransformer(Transformer):
|
||||
datasets = datasets.split(" ")
|
||||
return {"type": "search_on_datasets", "datasets": datasets, "question": question}
|
||||
|
||||
def get_chunk(self, items):
|
||||
chunk_id = items[2].children[0].strip("'\"")
|
||||
return {"type": "get_chunk", "chunk_id": chunk_id}
|
||||
|
||||
def list_chunks(self, items):
|
||||
doc_id = items[4].children[0].strip("'\"")
|
||||
result = {"type": "list_chunks", "doc_id": doc_id}
|
||||
|
||||
# Parse optional parameters: PAGE, SIZE, KEYWORDS, AVAILABLE
|
||||
# items structure varies based on which params are present
|
||||
for i, item in enumerate(items):
|
||||
if str(item) == "PAGE":
|
||||
result["page"] = int(items[i + 1])
|
||||
elif str(item) == "SIZE":
|
||||
result["size"] = int(items[i + 1])
|
||||
elif str(item) == "KEYWORDS":
|
||||
result["keywords"] = items[i + 1].children[0].strip("'\"")
|
||||
elif str(item) == "AVAILABLE":
|
||||
result["available_int"] = int(items[i + 1])
|
||||
|
||||
return result
|
||||
|
||||
def benchmark(self, items):
|
||||
concurrency: int = int(items[1])
|
||||
iterations: int = int(items[2])
|
||||
|
||||
@@ -1434,6 +1434,61 @@ class RAGFlowClient:
|
||||
print(
|
||||
f"Fail to search datasets: {dataset_names}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def get_chunk(self, command_dict):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
|
||||
chunk_id = command_dict["chunk_id"]
|
||||
response = self.http_client.request("GET", f"/chunk/get?chunk_id={chunk_id}", use_api_base=False,
|
||||
auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
if res_json["code"] == 0:
|
||||
self._print_key_value(res_json["data"])
|
||||
else:
|
||||
print(f"Fail to get chunk, code: {res_json['code']}, message: {res_json['message']}")
|
||||
else:
|
||||
print(f"Fail to get chunk, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def list_chunks(self, command_dict):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
|
||||
doc_id = command_dict["doc_id"]
|
||||
payload = {
|
||||
"doc_id": doc_id,
|
||||
}
|
||||
|
||||
# Add optional parameters (only if explicitly provided)
|
||||
if "page" in command_dict:
|
||||
payload["page"] = command_dict["page"]
|
||||
if "size" in command_dict:
|
||||
payload["size"] = command_dict["size"]
|
||||
if "keywords" in command_dict and command_dict["keywords"]:
|
||||
payload["keywords"] = command_dict["keywords"]
|
||||
if "available_int" in command_dict:
|
||||
payload["available_int"] = command_dict["available_int"]
|
||||
|
||||
response = self.http_client.request("POST", "/chunk/list", json_body=payload, use_api_base=False,
|
||||
auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
if res_json["code"] == 0:
|
||||
chunks = res_json["data"]["chunks"]
|
||||
if chunks:
|
||||
for i, chunk in enumerate(chunks):
|
||||
print(f"\n--- Chunk {i+1} ---")
|
||||
for key, value in chunk.items():
|
||||
print(f" {key}: {value}")
|
||||
else:
|
||||
print("No chunks found")
|
||||
else:
|
||||
print(f"Fail to list chunks, code: {res_json['code']}, message: {res_json['message']}")
|
||||
else:
|
||||
print(f"Fail to list chunks, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def show_version(self, command):
|
||||
if self.server_type == "admin":
|
||||
response = self.http_client.request("GET", "/admin/version", use_api_base=True, auth_kind="admin")
|
||||
@@ -1618,6 +1673,14 @@ class RAGFlowClient:
|
||||
|
||||
print(separator)
|
||||
|
||||
def _print_key_value(self, data: dict):
|
||||
"""Print data as key-value pairs (one per line)"""
|
||||
if not data:
|
||||
print("No data to print")
|
||||
return
|
||||
for key, value in data.items():
|
||||
print(f"{key}: {value}")
|
||||
|
||||
|
||||
def run_command(client: RAGFlowClient, command_dict: dict):
|
||||
command_type = command_dict["type"]
|
||||
@@ -1761,6 +1824,10 @@ def run_command(client: RAGFlowClient, command_dict: dict):
|
||||
client.import_docs_into_dataset(command_dict)
|
||||
case "search_on_datasets":
|
||||
return client.search_on_datasets(command_dict)
|
||||
case "get_chunk":
|
||||
return client.get_chunk(command_dict)
|
||||
case "list_chunks":
|
||||
return client.list_chunks(command_dict)
|
||||
case "meta":
|
||||
_handle_meta_command(command_dict)
|
||||
case _:
|
||||
@@ -1818,6 +1885,8 @@ LIST DOCUMENTS OF DATASET <dataset>
|
||||
SEARCH <query> ON DATASETS <dataset>
|
||||
LIST METADATA OF DATASETS <dataset>[, <dataset>]*
|
||||
LIST METADATA SUMMARY OF DATASET <dataset> DOCUMENTS <doc_id>[, <doc_id>]*
|
||||
GET CHUNK <chunk_id>
|
||||
LIST CHUNKS OF DOCUMENT <doc_id> [PAGE <page>] [SIZE <size>] [KEYWORDS <keywords>] [AVAILABLE <0|1>]
|
||||
|
||||
Meta Commands:
|
||||
\\?, \\h, \\help Show this help
|
||||
|
||||
Reference in New Issue
Block a user