2025-01-14 11:49:43 +08:00
[ project ]
2024-09-23 10:00:44 +08:00
name = "ragflow"
2025-11-12 14:54:28 +08:00
version = "0.22.0"
2024-09-23 10:00:44 +08:00
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
2025-05-14 21:23:29 -07:00
authors = [ { name = "Zhichang Yu" , email = "yuzhichang@gmail.com" } ]
license-files = [ "LICENSE" ]
2024-09-23 10:00:44 +08:00
readme = "README.md"
2025-01-14 11:49:43 +08:00
requires-python = ">=3.10,<3.13"
dependencies = [
2025-10-23 23:02:27 +08:00
"datrie>=0.8.3,<0.9.0" ,
2025-01-20 11:17:59 +08:00
"akshare>=1.15.78,<2.0.0" ,
2025-01-14 11:49:43 +08:00
"azure-storage-blob==12.22.0" ,
"azure-identity==1.17.1" ,
"azure-storage-file-datalake==12.16.0" ,
"anthropic==0.34.1" ,
"arxiv==2.1.3" ,
2025-10-23 23:02:27 +08:00
"aspose-slides>=25.10.0,<26.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')" ,
2025-11-03 19:59:18 +08:00
"atlassian-python-api==4.0.7" ,
2025-01-14 11:49:43 +08:00
"beartype>=0.18.5,<0.19.0" ,
"bio==1.7.1" ,
"blinker==1.7.0" ,
"boto3==1.34.140" ,
"botocore==1.34.140" ,
"cachetools==5.3.3" ,
"chardet==5.2.0" ,
"cn2an==0.5.22" ,
"cohere==5.6.2" ,
2025-08-12 10:59:20 +08:00
"Crawl4AI>=0.3.8" ,
2025-01-14 11:49:43 +08:00
"dashscope==1.20.11" ,
"deepl==1.18.0" ,
"demjson3==3.0.6" ,
"discord-py==2.3.2" ,
2025-11-03 19:59:18 +08:00
"dropbox==12.0.2" ,
2025-01-14 11:49:43 +08:00
"duckduckgo-search>=7.2.0,<8.0.0" ,
"editdistance==0.8.1" ,
"elastic-transport==8.12.0" ,
"elasticsearch==8.12.1" ,
"elasticsearch-dsl==8.12.0" ,
2025-10-09 12:36:19 +08:00
"extract-msg>=0.39.0" ,
2025-01-14 11:49:43 +08:00
"filelock==3.15.4" ,
"flask==3.0.3" ,
"flask-cors==5.0.0" ,
"flask-login==0.6.3" ,
"flask-session==0.8.0" ,
"google-search-results==2.4.2" ,
2025-11-10 19:15:02 +08:00
"google-auth-oauthlib>=1.2.0,<2.0.0" ,
2025-01-14 11:49:43 +08:00
"groq==0.9.0" ,
"hanziconv==0.3.2" ,
"html-text==0.6.2" ,
2025-10-15 08:54:20 +02:00
"httpx[socks]>=0.28.1,<0.29.0" ,
2025-01-14 11:49:43 +08:00
"huggingface-hub>=0.25.0,<0.26.0" ,
2025-11-12 13:33:33 +08:00
"infinity-sdk==0.6.5" ,
2025-01-14 11:49:43 +08:00
"infinity-emb>=0.0.66,<0.0.67" ,
"itsdangerous==2.1.2" ,
2025-01-22 19:43:14 +08:00
"json-repair==0.35.0" ,
2025-11-03 19:59:18 +08:00
"jira==3.10.5" ,
2025-01-14 11:49:43 +08:00
"markdown==3.6" ,
"markdown-to-json==2.1.1" ,
"minio==7.2.4" ,
"mistralai==0.4.2" ,
2025-11-03 19:59:18 +08:00
"mypy-boto3-s3==1.40.26" ,
2025-01-14 11:49:43 +08:00
"nltk==3.9.1" ,
"numpy>=1.26.0,<2.0.0" ,
2025-11-03 19:59:18 +08:00
"Office365-REST-Python-Client==2.6.2" ,
2025-10-15 08:54:20 +02:00
"ollama>=0.5.0" ,
2025-01-26 09:37:59 +08:00
"onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'" ,
"onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'" ,
2025-08-12 10:59:20 +08:00
"openai>=1.45.0" ,
2025-01-14 11:49:43 +08:00
"opencv-python==4.10.0.84" ,
"opencv-python-headless==4.10.0.84" ,
"openpyxl>=3.1.0,<4.0.0" ,
2025-06-12 11:37:42 +08:00
"opendal>=0.45.0,<0.46.0" ,
2025-01-14 11:49:43 +08:00
"ormsgpack==1.5.0" ,
"pandas>=2.2.0,<3.0.0" ,
"pdfplumber==0.10.4" ,
"peewee==3.17.1" ,
"pillow==10.4.0" ,
"protobuf==5.27.2" ,
"psycopg2-binary==2.9.9" ,
"pyclipper==1.3.0.post5" ,
"pycryptodomex==3.20.0" ,
2025-06-12 11:37:42 +08:00
"pymysql>=1.1.1,<2.0.0" ,
2025-08-14 13:45:19 +08:00
"pypdf==6.0.0" ,
2025-01-14 11:49:43 +08:00
"python-dotenv==1.0.1" ,
"python-dateutil==2.8.2" ,
"python-pptx>=1.0.2,<2.0.0" ,
"pywencai==0.12.2" ,
"qianfan==0.4.6" ,
"ranx==0.3.20" ,
"readability-lxml==0.8.1" ,
"valkey==6.0.2" ,
"requests==2.32.2" ,
"replicate==0.31.0" ,
"roman-numbers==1.0.2" ,
"ruamel-base==1.0.0" ,
2025-06-12 11:37:42 +08:00
"ruamel-yaml>=0.18.6,<0.19.0" ,
2025-01-14 11:49:43 +08:00
"scholarly==1.7.11" ,
"scikit-learn==1.5.0" ,
"selenium==4.22.0" ,
"selenium-wire==5.1.0" ,
"setuptools>=75.2.0,<76.0.0" ,
"shapely==2.0.5" ,
"six==1.16.0" ,
2025-11-03 19:59:18 +08:00
"slack-sdk==3.37.0" ,
2025-01-14 11:49:43 +08:00
"strenum==0.4.15" ,
"tabulate==0.9.0" ,
2025-02-26 10:21:04 +08:00
"tavily-python==0.5.1" ,
2025-10-27 15:14:58 +08:00
"tencentcloud-sdk-python==3.0.1478" ,
2025-01-14 11:49:43 +08:00
"tika==2.6.0" ,
"tiktoken==0.7.0" ,
"umap_learn==0.5.6" ,
2025-10-15 08:54:20 +02:00
"vertexai==1.70.0" ,
"google-genai>=1.41.0,<2.0.0" ,
2025-07-30 19:41:09 +08:00
"volcengine==1.0.194" ,
2025-01-14 11:49:43 +08:00
"voyageai==0.2.3" ,
"webdriver-manager==4.0.1" ,
"werkzeug==3.0.6" ,
"wikipedia==1.4.0" ,
"word2number==1.1" ,
2025-05-27 09:28:52 +08:00
"xgboost==1.6.0" ,
2025-01-14 11:49:43 +08:00
"xpinyin==0.7.6" ,
2025-07-30 19:41:09 +08:00
"yfinance==0.2.65" ,
2025-01-14 11:49:43 +08:00
"zhipuai==2.0.1" ,
2025-10-16 15:07:49 +08:00
"google-generativeai>=0.8.1,<0.9.0" , # Needed for cv_model and embedding_model
2025-01-14 11:49:43 +08:00
"python-docx>=1.1.2,<2.0.0" ,
"pypdf2>=3.0.1,<4.0.0" ,
"graspologic>=3.4.1,<4.0.0" ,
"mini-racer>=0.12.4,<0.13.0" ,
"pyodbc>=5.2.0,<6.0.0" ,
2025-09-30 19:15:01 +08:00
"pyicu>=2.15.3,<3.0.0" ,
2025-01-14 11:49:43 +08:00
"flasgger>=0.9.7.1,<0.10.0" ,
2025-03-03 18:59:49 +08:00
"xxhash>=3.5.0,<4.0.0" ,
"trio>=0.29.0" ,
2025-03-24 13:18:47 +08:00
"langfuse>=2.60.0" ,
2025-03-26 15:34:42 +08:00
"debugpy>=1.8.13" ,
2025-06-23 16:53:59 +08:00
"mcp>=1.9.4" ,
2025-05-16 16:32:19 +08:00
"opensearch-py==2.7.1" ,
"pluginlib==0.9.4" ,
2025-06-23 16:53:59 +08:00
"click>=8.1.8" ,
Add fallback to use 'calamine' parse engine in excel_parser.py (#9374)
### What problem does this PR solve?
add fallback to `calamine` engine when parse error raised using the
default `openpyxl` / `xlrd` engine.
e.g. the following error can be fixed:
```
Traceback (most recent call last):
File "/ragflow/deepdoc/parser/excel_parser.py", line 53, in _load_excel_to_workbook
df = pd.read_excel(file_like_object)
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 495, in read_excel
io = ExcelFile(
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 1567, in __init__
self._reader = self._engines[engine](
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_xlrd.py", line 46, in __init__
super().__init__(
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 573, in __init__
self.book = self.load_workbook(self.handles.handle, engine_kwargs)
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_xlrd.py", line 63, in load_workbook
return open_workbook(file_contents=data, **engine_kwargs)
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/__init__.py", line 172, in open_workbook
bk = open_workbook_xls(
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/book.py", line 68, in open_workbook_xls
bk.biff2_8_load(
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/book.py", line 641, in biff2_8_load
cd.locate_named_stream(UNICODE_LITERAL(qname))
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/compdoc.py", line 398, in locate_named_stream
result = self._locate_stream(
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/compdoc.py", line 429, in _locate_stream
raise CompDocError("%s corruption: seen[%d] == %d" % (qname, s, self.seen[s]))
xlrd.compdoc.CompDocError: Workbook corruption: seen[2] == 4
```
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
2025-08-12 12:41:33 +08:00
"python-calamine>=0.4.0" ,
2025-08-12 10:59:20 +08:00
"litellm>=1.74.15.post1" ,
2025-08-15 18:12:20 +08:00
"flask-mail>=0.10.0" ,
Feat: add admin CLI and admin service (#10186)
### What problem does this PR solve?
Introduce new feature: RAGFlow system admin service and CLI
### Introduction
Admin Service is a dedicated management component designed to monitor,
maintain, and administrate the RAGFlow system. It provides comprehensive
tools for ensuring system stability, performing operational tasks, and
managing users and permissions efficiently.
The service offers monitoring of critical components, including the
RAGFlow server, Task Executor processes, and dependent services such as
MySQL, Infinity / Elasticsearch, Redis, and MinIO. It automatically
checks their health status, resource usage, and uptime, and performs
restarts in case of failures to minimize downtime.
For user and system management, it supports listing, creating,
modifying, and deleting users and their associated resources like
knowledge bases and Agents.
Built with scalability and reliability in mind, the Admin Service
ensures smooth system operation and simplifies maintenance workflows.
It consists of a server-side Service and a command-line client (CLI),
both implemented in Python. User commands are parsed using the Lark
parsing toolkit.
- **Admin Service**: A backend service that interfaces with the RAGFlow
system to execute administrative operations and monitor its status.
- **Admin CLI**: A command-line interface that allows users to connect
to the Admin Service and issue commands for system management.
### Starting the Admin Service
1. Before start Admin Service, please make sure RAGFlow system is
already started.
2. Run the service script:
```bash
python admin/admin_server.py
```
The service will start and listen for incoming connections from the CLI
on the configured port.
### Using the Admin CLI
1. Ensure the Admin Service is running.
2. Launch the CLI client:
```bash
python admin/admin_client.py -h 0.0.0.0 -p 9381
## Supported Commands
Commands are case-insensitive and must be terminated with a semicolon
(`;`).
### Service Management Commands
- [x] `LIST SERVICES;`
- Lists all available services within the RAGFlow system.
- [ ] `SHOW SERVICE <id>;`
- Shows detailed status information for the service identified by
`<id>`.
- [ ] `STARTUP SERVICE <id>;`
- Attempts to start the service identified by `<id>`.
- [ ] `SHUTDOWN SERVICE <id>;`
- Attempts to gracefully shut down the service identified by `<id>`.
- [ ] `RESTART SERVICE <id>;`
- Attempts to restart the service identified by `<id>`.
### User Management Commands
- [x] `LIST USERS;`
- Lists all users known to the system.
- [ ] `SHOW USER '<username>';`
- Shows details and permissions for the specified user. The username
must be enclosed in single or double quotes.
- [ ] `DROP USER '<username>';`
- Removes the specified user from the system. Use with caution.
- [ ] `ALTER USER PASSWORD '<username>' '<new_password>';`
- Changes the password for the specified user.
### Data and Agent Commands
- [ ] `LIST DATASETS OF '<username>';`
- Lists the datasets associated with the specified user.
- [ ] `LIST AGENTS OF '<username>';`
- Lists the agents associated with the specified user.
### Meta-Commands
Meta-commands are prefixed with a backslash (`\`).
- `\?` or `\help`
- Shows help information for the available commands.
- `\q` or `\quit`
- Exits the CLI application.
## Examples
```commandline
admin> list users;
+-------------------------------+------------------------+-----------+-------------+
| create_date | email | is_active | nickname |
+-------------------------------+------------------------+-----------+-------------+
| Fri, 22 Nov 2024 16:03:41 GMT | jeffery@infiniflow.org | 1 | Jeffery |
| Fri, 22 Nov 2024 16:10:55 GMT | aya@infiniflow.org | 1 | Waterdancer |
+-------------------------------+------------------------+-----------+-------------+
admin> list services;
+-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+
| extra | host | id | name | port | service_type |
+-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+
| {} | 0.0.0.0 | 0 | ragflow_0 | 9380 | ragflow_server |
| {'meta_type': 'mysql', 'password': 'infini_rag_flow', 'username': 'root'} | localhost | 1 | mysql | 5455 | meta_data |
| {'password': 'infini_rag_flow', 'store_type': 'minio', 'user': 'rag_flow'} | localhost | 2 | minio | 9000 | file_store |
| {'password': 'infini_rag_flow', 'retrieval_type': 'elasticsearch', 'username': 'elastic'} | localhost | 3 | elasticsearch | 1200 | retrieval |
| {'db_name': 'default_db', 'retrieval_type': 'infinity'} | localhost | 4 | infinity | 23817 | retrieval |
| {'database': 1, 'mq_type': 'redis', 'password': 'infini_rag_flow'} | localhost | 5 | redis | 6379 | message_queue |
+-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+
```
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
Signed-off-by: jinhai <haijin.chn@gmail.com>
2025-09-22 10:37:49 +08:00
"lark>=1.2.2" ,
2025-10-10 09:39:15 +08:00
"mammoth>=1.11.0" ,
"markdownify>=1.2.0" ,
2025-10-16 15:07:49 +08:00
"captcha>=0.7.1" ,
2025-10-23 23:02:27 +08:00
"pip>=25.2" ,
2025-11-14 19:52:11 +08:00
"pypandoc>=1.16" ,
2025-03-03 18:59:49 +08:00
]
2025-06-03 15:21:06 +08:00
[ dependency-groups ]
test = [
"hypothesis>=6.132.0" ,
"openpyxl>=3.1.5" ,
"pillow>=10.4.0" ,
"pytest>=8.3.5" ,
"python-docx>=1.1.2" ,
"python-pptx>=1.0.2" ,
"reportlab>=4.4.1" ,
"requests>=2.32.2" ,
"requests-toolbelt>=1.0.0" ,
]
2025-09-26 14:55:19 +08:00
[ [ tool . uv . index ] ]
2025-10-10 12:41:45 +08:00
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
2025-09-26 14:55:19 +08:00
2025-03-21 18:44:49 +08:00
[ tool . setuptools ]
2025-06-04 13:16:17 +08:00
packages = [
'agent' ,
'agentic_reasoning' ,
'api' ,
'deepdoc' ,
'graphrag' ,
'intergrations.chatgpt-on-wechat.plugins' ,
'mcp.server' ,
'rag' ,
'sdk.python.ragflow_sdk' ,
]
2025-03-21 18:44:49 +08:00
2025-03-20 22:31:18 +08:00
[ tool . ruff ]
line-length = 200
2025-06-04 13:16:17 +08:00
exclude = [ ".venv" , "rag/svr/discord_svr.py" ]
2025-03-20 22:31:18 +08:00
[ tool . ruff . lint ]
extend-select = [ "ASYNC" , "ASYNC1" ]
ignore = [ "E402" ]
2025-06-04 13:16:17 +08:00
[ tool . pytest . ini_options ]
markers = [
"p1: high priority test cases" ,
"p2: medium priority test cases" ,
"p3: low priority test cases" ,
]