Files
ragflow/pyproject.toml

194 lines
5.3 KiB
TOML
Raw Normal View History

[project]
name = "ragflow"
version = "0.22.0"
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
license-files = ["LICENSE"]
readme = "README.md"
requires-python = ">=3.10,<3.13"
dependencies = [
"datrie>=0.8.3,<0.9.0",
"akshare>=1.15.78,<2.0.0",
"azure-storage-blob==12.22.0",
"azure-identity==1.17.1",
"azure-storage-file-datalake==12.16.0",
"anthropic==0.34.1",
"arxiv==2.1.3",
"aspose-slides>=25.10.0,<26.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')",
"atlassian-python-api==4.0.7",
"beartype>=0.18.5,<0.19.0",
"bio==1.7.1",
"blinker==1.7.0",
"boto3==1.34.140",
"botocore==1.34.140",
"cachetools==5.3.3",
"chardet==5.2.0",
"cn2an==0.5.22",
"cohere==5.6.2",
"Crawl4AI>=0.3.8",
"dashscope==1.20.11",
"deepl==1.18.0",
"demjson3==3.0.6",
"discord-py==2.3.2",
"dropbox==12.0.2",
"duckduckgo-search>=7.2.0,<8.0.0",
"editdistance==0.8.1",
"elastic-transport==8.12.0",
"elasticsearch==8.12.1",
"elasticsearch-dsl==8.12.0",
Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423) ### What problem does this PR solve? #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: jinhai <haijin.chn@gmail.com> Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com> Co-authored-by: TeslaZY <TeslaZY@outlook.com> Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com> Co-authored-by: AB <aj@Ajays-MacBook-Air.local> Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com> Co-authored-by: He Wang <wanghechn@qq.com> Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com> Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box> Co-authored-by: Stephen Hu <stephenhu@seismic.com> Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com> Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com> Co-authored-by: mxc <mxc@example.com> Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com> Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com> Co-authored-by: mcoder6425 <mcoder64@gmail.com> Co-authored-by: lemsn <lemsn@msn.com> Co-authored-by: lemsn <lemsn@126.com> Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com> Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com> Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
2025-10-09 12:36:19 +08:00
"extract-msg>=0.39.0",
"filelock==3.15.4",
"flask==3.0.3",
"flask-cors==5.0.0",
"flask-login==0.6.3",
"flask-session==0.8.0",
"google-search-results==2.4.2",
"google-auth-oauthlib>=1.2.0,<2.0.0",
"groq==0.9.0",
"hanziconv==0.3.2",
"html-text==0.6.2",
"httpx[socks]>=0.28.1,<0.29.0",
"huggingface-hub>=0.25.0,<0.26.0",
"infinity-sdk==0.6.5",
"infinity-emb>=0.0.66,<0.0.67",
"itsdangerous==2.1.2",
"json-repair==0.35.0",
"jira==3.10.5",
"markdown==3.6",
"markdown-to-json==2.1.1",
"minio==7.2.4",
"mistralai==0.4.2",
"mypy-boto3-s3==1.40.26",
"nltk==3.9.1",
"numpy>=1.26.0,<2.0.0",
"Office365-REST-Python-Client==2.6.2",
"ollama>=0.5.0",
"onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
"onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
"openai>=1.45.0",
"opencv-python==4.10.0.84",
"opencv-python-headless==4.10.0.84",
"openpyxl>=3.1.0,<4.0.0",
"opendal>=0.45.0,<0.46.0",
"ormsgpack==1.5.0",
"pandas>=2.2.0,<3.0.0",
"pdfplumber==0.10.4",
"peewee==3.17.1",
"pillow==10.4.0",
"protobuf==5.27.2",
"psycopg2-binary==2.9.9",
"pyclipper==1.3.0.post5",
"pycryptodomex==3.20.0",
"pymysql>=1.1.1,<2.0.0",
"pypdf==6.0.0",
"python-dotenv==1.0.1",
"python-dateutil==2.8.2",
"python-pptx>=1.0.2,<2.0.0",
"pywencai==0.12.2",
"qianfan==0.4.6",
"ranx==0.3.20",
"readability-lxml==0.8.1",
"valkey==6.0.2",
"requests==2.32.2",
"replicate==0.31.0",
"roman-numbers==1.0.2",
"ruamel-base==1.0.0",
"ruamel-yaml>=0.18.6,<0.19.0",
"scholarly==1.7.11",
"scikit-learn==1.5.0",
"selenium==4.22.0",
"selenium-wire==5.1.0",
"setuptools>=75.2.0,<76.0.0",
"shapely==2.0.5",
"six==1.16.0",
"slack-sdk==3.37.0",
"strenum==0.4.15",
"tabulate==0.9.0",
"tavily-python==0.5.1",
"tencentcloud-sdk-python==3.0.1478",
"tika==2.6.0",
"tiktoken==0.7.0",
"umap_learn==0.5.6",
"vertexai==1.70.0",
"google-genai>=1.41.0,<2.0.0",
"volcengine==1.0.194",
"voyageai==0.2.3",
"webdriver-manager==4.0.1",
"werkzeug==3.0.6",
"wikipedia==1.4.0",
"word2number==1.1",
"xgboost==1.6.0",
"xpinyin==0.7.6",
"yfinance==0.2.65",
"zhipuai==2.0.1",
"google-generativeai>=0.8.1,<0.9.0", # Needed for cv_model and embedding_model
"python-docx>=1.1.2,<2.0.0",
"pypdf2>=3.0.1,<4.0.0",
"graspologic>=3.4.1,<4.0.0",
"mini-racer>=0.12.4,<0.13.0",
"pyodbc>=5.2.0,<6.0.0",
"pyicu>=2.15.3,<3.0.0",
"flasgger>=0.9.7.1,<0.10.0",
"xxhash>=3.5.0,<4.0.0",
"trio>=0.29.0",
"langfuse>=2.60.0",
"debugpy>=1.8.13",
"mcp>=1.9.4",
Feat: Support tool calling in Generate component (#7572) ### What problem does this PR solve? Hello, our use case requires LLM agent to invoke some tools, so I made a simple implementation here. This PR does two things: 1. A simple plugin mechanism based on `pluginlib`: This mechanism lives in the `plugin` directory. It will only load plugins from `plugin/embedded_plugins` for now. A sample plugin `bad_calculator.py` is placed in `plugin/embedded_plugins/llm_tools`, it accepts two numbers `a` and `b`, then give a wrong result `a + b + 100`. In the future, it can load plugins from external location with little code change. Plugins are divided into different types. The only plugin type supported in this PR is `llm_tools`, which must implement the `LLMToolPlugin` class in the `plugin/llm_tool_plugin.py`. More plugin types can be added in the future. 2. A tool selector in the `Generate` component: Added a tool selector to select one or more tools for LLM: ![image](https://github.com/user-attachments/assets/74a21fdf-9333-4175-991b-43df6524c5dc) And with the `bad_calculator` tool, it results this with the `qwen-max` model: ![image](https://github.com/user-attachments/assets/93aff9c4-8550-414a-90a2-1a15a5249d94) ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
2025-05-16 16:32:19 +08:00
"opensearch-py==2.7.1",
"pluginlib==0.9.4",
"click>=8.1.8",
Add fallback to use 'calamine' parse engine in excel_parser.py (#9374) ### What problem does this PR solve? add fallback to `calamine` engine when parse error raised using the default `openpyxl` / `xlrd` engine. e.g. the following error can be fixed: ``` Traceback (most recent call last): File "/ragflow/deepdoc/parser/excel_parser.py", line 53, in _load_excel_to_workbook df = pd.read_excel(file_like_object) File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 495, in read_excel io = ExcelFile( File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 1567, in __init__ self._reader = self._engines[engine]( File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_xlrd.py", line 46, in __init__ super().__init__( File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 573, in __init__ self.book = self.load_workbook(self.handles.handle, engine_kwargs) File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_xlrd.py", line 63, in load_workbook return open_workbook(file_contents=data, **engine_kwargs) File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/__init__.py", line 172, in open_workbook bk = open_workbook_xls( File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/book.py", line 68, in open_workbook_xls bk.biff2_8_load( File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/book.py", line 641, in biff2_8_load cd.locate_named_stream(UNICODE_LITERAL(qname)) File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/compdoc.py", line 398, in locate_named_stream result = self._locate_stream( File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/compdoc.py", line 429, in _locate_stream raise CompDocError("%s corruption: seen[%d] == %d" % (qname, s, self.seen[s])) xlrd.compdoc.CompDocError: Workbook corruption: seen[2] == 4 ``` ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2025-08-12 12:41:33 +08:00
"python-calamine>=0.4.0",
"litellm>=1.74.15.post1",
"flask-mail>=0.10.0",
Feat: add admin CLI and admin service (#10186) ### What problem does this PR solve? Introduce new feature: RAGFlow system admin service and CLI ### Introduction Admin Service is a dedicated management component designed to monitor, maintain, and administrate the RAGFlow system. It provides comprehensive tools for ensuring system stability, performing operational tasks, and managing users and permissions efficiently. The service offers monitoring of critical components, including the RAGFlow server, Task Executor processes, and dependent services such as MySQL, Infinity / Elasticsearch, Redis, and MinIO. It automatically checks their health status, resource usage, and uptime, and performs restarts in case of failures to minimize downtime. For user and system management, it supports listing, creating, modifying, and deleting users and their associated resources like knowledge bases and Agents. Built with scalability and reliability in mind, the Admin Service ensures smooth system operation and simplifies maintenance workflows. It consists of a server-side Service and a command-line client (CLI), both implemented in Python. User commands are parsed using the Lark parsing toolkit. - **Admin Service**: A backend service that interfaces with the RAGFlow system to execute administrative operations and monitor its status. - **Admin CLI**: A command-line interface that allows users to connect to the Admin Service and issue commands for system management. ### Starting the Admin Service 1. Before start Admin Service, please make sure RAGFlow system is already started. 2. Run the service script: ```bash python admin/admin_server.py ``` The service will start and listen for incoming connections from the CLI on the configured port. ### Using the Admin CLI 1. Ensure the Admin Service is running. 2. Launch the CLI client: ```bash python admin/admin_client.py -h 0.0.0.0 -p 9381 ## Supported Commands Commands are case-insensitive and must be terminated with a semicolon (`;`). ### Service Management Commands - [x] `LIST SERVICES;` - Lists all available services within the RAGFlow system. - [ ] `SHOW SERVICE <id>;` - Shows detailed status information for the service identified by `<id>`. - [ ] `STARTUP SERVICE <id>;` - Attempts to start the service identified by `<id>`. - [ ] `SHUTDOWN SERVICE <id>;` - Attempts to gracefully shut down the service identified by `<id>`. - [ ] `RESTART SERVICE <id>;` - Attempts to restart the service identified by `<id>`. ### User Management Commands - [x] `LIST USERS;` - Lists all users known to the system. - [ ] `SHOW USER '<username>';` - Shows details and permissions for the specified user. The username must be enclosed in single or double quotes. - [ ] `DROP USER '<username>';` - Removes the specified user from the system. Use with caution. - [ ] `ALTER USER PASSWORD '<username>' '<new_password>';` - Changes the password for the specified user. ### Data and Agent Commands - [ ] `LIST DATASETS OF '<username>';` - Lists the datasets associated with the specified user. - [ ] `LIST AGENTS OF '<username>';` - Lists the agents associated with the specified user. ### Meta-Commands Meta-commands are prefixed with a backslash (`\`). - `\?` or `\help` - Shows help information for the available commands. - `\q` or `\quit` - Exits the CLI application. ## Examples ```commandline admin> list users; +-------------------------------+------------------------+-----------+-------------+ | create_date | email | is_active | nickname | +-------------------------------+------------------------+-----------+-------------+ | Fri, 22 Nov 2024 16:03:41 GMT | jeffery@infiniflow.org | 1 | Jeffery | | Fri, 22 Nov 2024 16:10:55 GMT | aya@infiniflow.org | 1 | Waterdancer | +-------------------------------+------------------------+-----------+-------------+ admin> list services; +-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+ | extra | host | id | name | port | service_type | +-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+ | {} | 0.0.0.0 | 0 | ragflow_0 | 9380 | ragflow_server | | {'meta_type': 'mysql', 'password': 'infini_rag_flow', 'username': 'root'} | localhost | 1 | mysql | 5455 | meta_data | | {'password': 'infini_rag_flow', 'store_type': 'minio', 'user': 'rag_flow'} | localhost | 2 | minio | 9000 | file_store | | {'password': 'infini_rag_flow', 'retrieval_type': 'elasticsearch', 'username': 'elastic'} | localhost | 3 | elasticsearch | 1200 | retrieval | | {'db_name': 'default_db', 'retrieval_type': 'infinity'} | localhost | 4 | infinity | 23817 | retrieval | | {'database': 1, 'mq_type': 'redis', 'password': 'infini_rag_flow'} | localhost | 5 | redis | 6379 | message_queue | +-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) Signed-off-by: jinhai <haijin.chn@gmail.com>
2025-09-22 10:37:49 +08:00
"lark>=1.2.2",
"mammoth>=1.11.0",
"markdownify>=1.2.0",
"captcha>=0.7.1",
"pip>=25.2",
"pypandoc>=1.16",
]
[dependency-groups]
test = [
"hypothesis>=6.132.0",
"openpyxl>=3.1.5",
"pillow>=10.4.0",
"pytest>=8.3.5",
"python-docx>=1.1.2",
"python-pptx>=1.0.2",
"reportlab>=4.4.1",
"requests>=2.32.2",
"requests-toolbelt>=1.0.0",
]
[[tool.uv.index]]
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
[tool.setuptools]
packages = [
'agent',
'agentic_reasoning',
'api',
'deepdoc',
'graphrag',
'intergrations.chatgpt-on-wechat.plugins',
'mcp.server',
'rag',
'sdk.python.ragflow_sdk',
]
[tool.ruff]
line-length = 200
exclude = [".venv", "rag/svr/discord_svr.py"]
[tool.ruff.lint]
extend-select = ["ASYNC", "ASYNC1"]
ignore = ["E402"]
[tool.pytest.ini_options]
markers = [
"p1: high priority test cases",
"p2: medium priority test cases",
"p3: low priority test cases",
]