mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 15:31:05 +08:00
Implement UpdateDataset and UpdateMetadata in GO (#13928)
### What problem does this PR solve? Implement UpdateDataset and UpdateMetadata in GO Add cli: UPDATE CHUNK <chunk_id> OF DATASET <dataset_name> SET <update_fields> REMOVE TAGS 'tag1', 'tag2' from DATASET 'dataset_name'; SET METADATA OF DOCUMENT <doc_id> TO <meta> ### Type of change - [ ] Refactoring
This commit is contained in:
@@ -99,6 +99,9 @@ sql_command: login_user
|
||||
| list_chunks
|
||||
| insert_dataset_from_file
|
||||
| insert_metadata_from_file
|
||||
| update_chunk
|
||||
| set_metadata
|
||||
| remove_tags
|
||||
| create_chat_session
|
||||
| drop_chat_session
|
||||
| list_chat_sessions
|
||||
@@ -114,10 +117,12 @@ sql_command: login_user
|
||||
// meta command definition
|
||||
meta_command: "\\" meta_command_name [meta_args]
|
||||
|
||||
COMMA: ","
|
||||
|
||||
meta_command_name: /[a-zA-Z?]+/
|
||||
meta_args: (meta_arg)+
|
||||
|
||||
meta_arg: /[^\\s"']+/ | quoted_string
|
||||
meta_arg: /[^\s"',]+/ | quoted_string
|
||||
|
||||
// command definition
|
||||
|
||||
@@ -215,8 +220,11 @@ SIZE: "SIZE"i
|
||||
KEYWORDS: "KEYWORDS"i
|
||||
AVAILABLE: "AVAILABLE"i
|
||||
FILE: "FILE"i
|
||||
UPDATE: "UPDATE"i
|
||||
REMOVE: "REMOVE"i
|
||||
TAGS: "TAGS"i
|
||||
|
||||
login_user: LOGIN USER quoted_string ";"
|
||||
login_user: LOGIN USER quoted_string (PASSWORD quoted_string)? ";"
|
||||
list_services: LIST SERVICES ";"
|
||||
show_service: SHOW SERVICE NUMBER ";"
|
||||
startup_service: STARTUP SERVICE NUMBER ";"
|
||||
@@ -299,6 +307,9 @@ user_statement: ping_server
|
||||
| list_user_default_models
|
||||
| import_docs_into_dataset
|
||||
| search_on_datasets
|
||||
| update_chunk
|
||||
| set_metadata
|
||||
| remove_tags
|
||||
| create_chat_session
|
||||
| drop_chat_session
|
||||
| list_chat_sessions
|
||||
@@ -328,8 +339,8 @@ create_user_dataset_with_pipeline: CREATE DATASET quoted_string WITH EMBEDDING q
|
||||
drop_user_dataset: DROP DATASET quoted_string ";"
|
||||
list_user_dataset_files: LIST FILES OF DATASET quoted_string ";"
|
||||
list_user_dataset_documents: LIST DOCUMENTS OF DATASET quoted_string ";"
|
||||
list_user_datasets_metadata: LIST METADATA OF DATASETS quoted_string ("," quoted_string)* ";"
|
||||
list_user_documents_metadata_summary: LIST METADATA SUMMARY OF DATASET quoted_string (DOCUMENTS quoted_string ("," quoted_string)*)? ";"
|
||||
list_user_datasets_metadata: LIST METADATA OF DATASETS quoted_string (COMMA quoted_string)* ";"
|
||||
list_user_documents_metadata_summary: LIST METADATA SUMMARY OF DATASET quoted_string (DOCUMENTS quoted_string (COMMA quoted_string)*)? ";"
|
||||
list_user_agents: LIST AGENTS ";"
|
||||
list_user_chats: LIST CHATS ";"
|
||||
create_user_chat: CREATE CHAT quoted_string ";"
|
||||
@@ -353,11 +364,15 @@ parse_dataset_docs: PARSE quoted_string OF DATASET quoted_string ";"
|
||||
parse_dataset_sync: PARSE DATASET quoted_string SYNC ";"
|
||||
parse_dataset_async: PARSE DATASET quoted_string ASYNC ";"
|
||||
|
||||
update_chunk: UPDATE CHUNK quoted_string OF DATASET quoted_string SET quoted_string ";"
|
||||
set_metadata: SET METADATA OF DOCUMENT quoted_string TO quoted_string ";"
|
||||
remove_tags: REMOVE TAGS quoted_string (COMMA quoted_string)* FROM DATASET quoted_string ";"
|
||||
|
||||
// Internal CLI for GO
|
||||
insert_dataset_from_file: INSERT DATASET FROM FILE quoted_string ";"
|
||||
insert_metadata_from_file: INSERT METADATA FROM FILE quoted_string ";"
|
||||
|
||||
identifier_list: identifier ("," identifier)*
|
||||
identifier_list: identifier (COMMA identifier)*
|
||||
|
||||
identifier: WORD
|
||||
quoted_string: QUOTED_STRING
|
||||
@@ -381,7 +396,13 @@ class RAGFlowCLITransformer(Transformer):
|
||||
|
||||
def login_user(self, items):
|
||||
email = items[2].children[0].strip("'\"")
|
||||
return {"type": "login_user", "email": email}
|
||||
if len(items) == 5:
|
||||
# With password: LOGIN USER email PASSWORD password
|
||||
password = items[4].children[0].strip("'\"")
|
||||
return {"type": "login_user", "email": email, "password": password}
|
||||
else:
|
||||
# Without password: LOGIN USER email
|
||||
return {"type": "login_user", "email": email}
|
||||
|
||||
def ping_server(self, items):
|
||||
return {"type": "ping_server"}
|
||||
@@ -766,6 +787,44 @@ class RAGFlowCLITransformer(Transformer):
|
||||
file_path = items[4].children[0].strip("'\"")
|
||||
return {"type": "insert_metadata_from_file", "file_path": file_path}
|
||||
|
||||
def update_chunk(self, items):
|
||||
def get_quoted_value(item):
|
||||
if hasattr(item, 'children') and item.children:
|
||||
return item.children[0].strip("'\"")
|
||||
return str(item).strip("'\"")
|
||||
|
||||
chunk_id = get_quoted_value(items[2])
|
||||
dataset_name = get_quoted_value(items[5])
|
||||
json_body = get_quoted_value(items[7])
|
||||
return {"type": "update_chunk", "chunk_id": chunk_id, "dataset_name": dataset_name, "json_body": json_body}
|
||||
|
||||
def set_metadata(self, items):
|
||||
doc_id = items[4].children[0].strip("'\"")
|
||||
meta_json = items[6].children[0].strip("'\"")
|
||||
return {"type": "set_metadata", "doc_id": doc_id, "meta": meta_json}
|
||||
|
||||
def remove_tags(self, items):
|
||||
# items: REMOVE, TAGS, quoted_string(tag1), quoted_string(tag2), ..., FROM, DATASET, quoted_string(dataset_name), ";"
|
||||
tags = []
|
||||
# Start from index 2 (after TAGS keyword) and parse quoted strings until FROM
|
||||
for i in range(2, len(items)):
|
||||
item = items[i]
|
||||
# Check for FROM token to stop
|
||||
if hasattr(item, 'type') and item.type == 'FROM':
|
||||
break
|
||||
if hasattr(item, 'children') and item.children:
|
||||
tag = item.children[0].strip("'\"")
|
||||
tags.append(tag)
|
||||
# Find dataset_name: quoted_string after DATASET
|
||||
dataset_name = None
|
||||
for i, item in enumerate(items):
|
||||
# Check if item is a DATASET token
|
||||
if hasattr(item, 'type') and item.type == 'DATASET':
|
||||
# Next item should be quoted_string
|
||||
dataset_name = items[i + 1].children[0].strip("'\"")
|
||||
break
|
||||
return {"type": "remove_tags", "dataset_name": dataset_name, "tags": tags}
|
||||
|
||||
def list_chunks(self, items):
|
||||
doc_id = items[4].children[0].strip("'\"")
|
||||
result = {"type": "list_chunks", "doc_id": doc_id}
|
||||
|
||||
@@ -18,6 +18,9 @@ import sys
|
||||
import argparse
|
||||
import base64
|
||||
import getpass
|
||||
import os
|
||||
import atexit
|
||||
import readline
|
||||
from cmd import Cmd
|
||||
from typing import Any, Dict, List
|
||||
|
||||
@@ -61,6 +64,12 @@ class RAGFlowCLI(Cmd):
|
||||
self.port: int = 0
|
||||
self.mode: str = "admin"
|
||||
self.ragflow_client = None
|
||||
# History file for readline persistence
|
||||
self.history_file = os.path.expanduser("~/.ragflow_cli_history")
|
||||
# Load existing history
|
||||
self._load_history()
|
||||
# Register cleanup to save history on exit
|
||||
atexit.register(self._save_history)
|
||||
|
||||
intro = r"""Type "\h" for help."""
|
||||
prompt = "ragflow> "
|
||||
@@ -99,6 +108,7 @@ class RAGFlowCLI(Cmd):
|
||||
return {"type": "empty"}
|
||||
|
||||
self.command_history.append(command_str)
|
||||
readline.add_history(command_str)
|
||||
|
||||
try:
|
||||
result = self.parser.parse(command_str)
|
||||
@@ -210,6 +220,21 @@ class RAGFlowCLI(Cmd):
|
||||
|
||||
print(separator)
|
||||
|
||||
def _load_history(self):
|
||||
"""Load command history from file."""
|
||||
try:
|
||||
if os.path.exists(self.history_file):
|
||||
readline.read_history_file(self.history_file)
|
||||
except Exception:
|
||||
pass # Ignore errors loading history
|
||||
|
||||
def _save_history(self):
|
||||
"""Save command history to file."""
|
||||
try:
|
||||
readline.write_history_file(self.history_file)
|
||||
except Exception:
|
||||
pass # Ignore errors saving history
|
||||
|
||||
def run_interactive(self, args):
|
||||
if self.verify_auth(args, single_command=False, auth=args["auth"]):
|
||||
print(r"""
|
||||
|
||||
@@ -24,7 +24,6 @@ from http_client import HttpClient
|
||||
from lark import Tree
|
||||
from user import encrypt_password, login_user
|
||||
|
||||
import getpass
|
||||
import base64
|
||||
from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
|
||||
from Cryptodome.PublicKey import RSA
|
||||
@@ -63,10 +62,16 @@ class RAGFlowClient:
|
||||
return
|
||||
|
||||
email: str = command["email"]
|
||||
user_password = getpass.getpass(f"password for {email}: ").strip()
|
||||
user_password: str = command.get("password")
|
||||
if not user_password:
|
||||
import getpass
|
||||
user_password = getpass.getpass("Password: ")
|
||||
try:
|
||||
token = login_user(self.http_client, self.server_type, email, user_password)
|
||||
self.http_client.login_token = token
|
||||
# Also store as api_key for API endpoint authentication
|
||||
if self.server_type == "user":
|
||||
self.http_client.api_key = token
|
||||
print(f"Login user {email} successfully")
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
@@ -1506,6 +1511,108 @@ class RAGFlowClient:
|
||||
else:
|
||||
print(f"Fail to insert metadata from file, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def update_chunk(self, command_dict):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
|
||||
chunk_id = command_dict["chunk_id"]
|
||||
dataset_name = command_dict["dataset_name"]
|
||||
json_body_str = command_dict["json_body"]
|
||||
|
||||
# Get dataset_id from dataset_name
|
||||
dataset_id = self._get_dataset_id(dataset_name)
|
||||
if dataset_id is None:
|
||||
return
|
||||
|
||||
# Get doc_id from chunk_id via GET /chunk/get
|
||||
response = self.http_client.request("GET", f"/chunk/get?chunk_id={chunk_id}", use_api_base=False,
|
||||
auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code != 200:
|
||||
print(f"Fail to get chunk info, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
return
|
||||
|
||||
doc_id = None
|
||||
if res_json.get("code") == 0 and res_json.get("data"):
|
||||
doc_id = res_json["data"].get("doc_id")
|
||||
|
||||
if not doc_id:
|
||||
print(f"Could not find document_id for chunk {chunk_id}")
|
||||
return
|
||||
|
||||
# Parse json_body
|
||||
try:
|
||||
payload = json.loads(json_body_str)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Invalid JSON body: {e}")
|
||||
return
|
||||
|
||||
# Call PUT /datasets/{dataset_id}/documents/{doc_id}/chunks/{chunk_id}
|
||||
path = f"/datasets/{dataset_id}/documents/{doc_id}/chunks/{chunk_id}"
|
||||
response = self.http_client.request("PUT", path, json_body=payload, use_api_base=True, auth_kind="api")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
if res_json.get("code") == 0:
|
||||
print(f"Success to update chunk: {chunk_id}")
|
||||
else:
|
||||
print(f"Fail to update chunk, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
else:
|
||||
print(f"Fail to update chunk, HTTP {response.status_code}")
|
||||
|
||||
def set_metadata(self, command_dict):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
|
||||
doc_id = command_dict["doc_id"]
|
||||
meta_json_str = command_dict["meta"]
|
||||
|
||||
# Send meta as JSON string
|
||||
payload = {
|
||||
"doc_id": doc_id,
|
||||
"meta": meta_json_str,
|
||||
}
|
||||
|
||||
response = self.http_client.request("POST", "/document/set_meta", json_body=payload,
|
||||
use_api_base=False, auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
if res_json.get("code") == 0:
|
||||
print(f"Success to set metadata for document: {doc_id}")
|
||||
else:
|
||||
print(f"Fail to set metadata, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
else:
|
||||
print(f"Fail to set metadata, HTTP {response.status_code}")
|
||||
|
||||
def remove_tags(self, command_dict):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
return
|
||||
|
||||
dataset_name = command_dict["dataset_name"]
|
||||
dataset_id = self._get_dataset_id(dataset_name)
|
||||
if dataset_id is None:
|
||||
print(f"Dataset not found: {dataset_name}")
|
||||
return
|
||||
|
||||
tags = command_dict["tags"]
|
||||
|
||||
payload = {
|
||||
"tags": tags,
|
||||
}
|
||||
|
||||
response = self.http_client.request("POST", f"/kb/{dataset_id}/rm_tags", json_body=payload,
|
||||
use_api_base=False, auth_kind="web")
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
if res_json.get("code") == 0:
|
||||
print(f"Success to remove tags from dataset: {dataset_name}")
|
||||
else:
|
||||
print(f"Fail to remove tags, code: {res_json.get('code')}, message: {res_json.get('message')}")
|
||||
else:
|
||||
print(f"Fail to remove tags, HTTP {response.status_code}")
|
||||
|
||||
def list_chunks(self, command_dict):
|
||||
if self.server_type != "user":
|
||||
print("This command is only allowed in USER mode")
|
||||
@@ -1903,6 +2010,12 @@ def run_command(client: RAGFlowClient, command_dict: dict):
|
||||
return client.insert_dataset_from_file(command_dict)
|
||||
case "insert_metadata_from_file":
|
||||
return client.insert_metadata_from_file(command_dict)
|
||||
case "update_chunk":
|
||||
return client.update_chunk(command_dict)
|
||||
case "set_metadata":
|
||||
return client.set_metadata(command_dict)
|
||||
case "remove_tags":
|
||||
return client.remove_tags(command_dict)
|
||||
case "list_chunks":
|
||||
return client.list_chunks(command_dict)
|
||||
case "meta":
|
||||
|
||||
Reference in New Issue
Block a user