Files
ragflow/pyproject.toml

279 lines
7.2 KiB
TOML
Raw Normal View History

[project]
name = "ragflow"
version = "0.22.1"
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
license-files = ["LICENSE"]
readme = "README.md"
requires-python = ">=3.11,<3.15"
dependencies = [
"datrie>=0.8.3,<0.9.0",
"akshare>=1.15.78,<2.0.0",
"azure-storage-blob==12.22.0",
"azure-identity==1.17.1",
"azure-storage-file-datalake==12.16.0",
"anthropic==0.34.1",
"arxiv==2.1.3",
"aspose-slides>=25.10.0,<26.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')",
"atlassian-python-api==4.0.7",
"beartype>=0.20.0,<1.0.0",
"bio==1.7.1",
"blinker==1.7.0",
"boto3==1.34.140",
"botocore==1.34.140",
"cachetools==5.3.3",
"chardet==5.2.0",
"cn2an==0.5.22",
"cohere==5.6.2",
"Crawl4AI>=0.3.8",
"dashscope==1.20.11",
"deepl==1.18.0",
"demjson3==3.0.6",
"discord-py==2.3.2",
"dropbox==12.0.2",
"duckduckgo-search>=7.2.0,<8.0.0",
"editdistance==0.8.1",
"elastic-transport==8.12.0",
"elasticsearch==8.12.1",
"elasticsearch-dsl==8.12.0",
Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423) ### What problem does this PR solve? #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: jinhai <haijin.chn@gmail.com> Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com> Co-authored-by: TeslaZY <TeslaZY@outlook.com> Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com> Co-authored-by: AB <aj@Ajays-MacBook-Air.local> Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com> Co-authored-by: He Wang <wanghechn@qq.com> Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com> Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box> Co-authored-by: Stephen Hu <stephenhu@seismic.com> Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com> Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com> Co-authored-by: mxc <mxc@example.com> Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com> Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com> Co-authored-by: mcoder6425 <mcoder64@gmail.com> Co-authored-by: lemsn <lemsn@msn.com> Co-authored-by: lemsn <lemsn@126.com> Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com> Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com> Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
2025-10-09 12:36:19 +08:00
"extract-msg>=0.39.0",
"filelock==3.15.4",
"flask==3.0.3",
"flask-cors==5.0.0",
"flask-login==0.6.3",
"flask-session==0.8.0",
"google-search-results==2.4.2",
"google-auth-oauthlib>=1.2.0,<2.0.0",
"groq==0.9.0",
"hanziconv==0.3.2",
"html-text==0.6.2",
"httpx[socks]>=0.28.1,<0.29.0",
"huggingface-hub>=0.25.0,<0.26.0",
"infinity-sdk==0.6.11",
"infinity-emb>=0.0.66,<0.0.67",
"itsdangerous==2.1.2",
"json-repair==0.35.0",
"jira==3.10.5",
"markdown==3.6",
"markdown-to-json==2.1.1",
"minio==7.2.4",
"mistralai==0.4.2",
"mypy-boto3-s3==1.40.26",
"nltk==3.9.1",
"numpy>=1.26.0,<2.0.0",
"Office365-REST-Python-Client==2.6.2",
"ollama>=0.5.0",
"onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
"onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
"openai>=1.45.0",
"opencv-python==4.10.0.84",
"opencv-python-headless==4.10.0.84",
"openpyxl>=3.1.0,<4.0.0",
"opendal>=0.45.0,<0.46.0",
"ormsgpack==1.5.0",
"pandas>=2.2.0,<3.0.0",
"pdfplumber==0.10.4",
"peewee==3.17.1",
"pillow==10.4.0",
"protobuf==5.27.2",
"psycopg2-binary==2.9.9",
"pyclipper==1.3.0.post5",
"pycryptodomex==3.20.0",
"pymysql>=1.1.1,<2.0.0",
Chore(deps): Bump pypdf from 6.0.0 to 6.4.0 (#11505) Bumps [pypdf](https://github.com/py-pdf/pypdf) from 6.0.0 to 6.4.0. <details> <summary>Release notes</summary> <p><em>Sourced from <a href="https://github.com/py-pdf/pypdf/releases">pypdf's releases</a>.</em></p> <blockquote> <h2>Version 6.4.0, 2025-11-23</h2> <h2>What's new</h2> <h3>Security (SEC)</h3> <ul> <li>Reduce default limit for LZW decoding by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> </ul> <h3>New Features (ENH)</h3> <ul> <li>Parse and format comb fields in text widget annotations (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3519">#3519</a>) by <a href="https://github.com/PJBrs"><code>@​PJBrs</code></a></li> </ul> <h3>Robustness (ROB)</h3> <ul> <li>Silently ignore Adobe Ascii85 whitespace for suffix detection (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3528">#3528</a>) by <a href="https://github.com/mbierma"><code>@​mbierma</code></a></li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.3.0...6.4.0">Full Changelog</a></p> <h2>Version 6.3.0, 2025-11-16</h2> <h2>What's new</h2> <h3>New Features (ENH)</h3> <ul> <li>Wrap and align text in flattened PDF forms (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3465">#3465</a>) by <a href="https://github.com/PJBrs"><code>@​PJBrs</code></a></li> </ul> <h3>Bug Fixes (BUG)</h3> <ul> <li>Fix missing &quot;PreventGC&quot; when cloning (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3520">#3520</a>) by <a href="https://github.com/patrick91"><code>@​patrick91</code></a></li> <li>Preserve JPEG image quality by default (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3516">#3516</a>) by <a href="https://github.com/Lucas-C"><code>@​Lucas-C</code></a></li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.2.0...6.3.0">Full Changelog</a></p> <h2>Version 6.2.0, 2025-11-09</h2> <h2>What's new</h2> <h3>New Features (ENH)</h3> <ul> <li>Add 'strict' parameter to PDFWriter (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3503">#3503</a>) by <a href="https://github.com/Arya-A-Nair"><code>@​Arya-A-Nair</code></a></li> </ul> <h3>Bug Fixes (BUG)</h3> <ul> <li>PdfWriter.append fails when there are articles being None (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3509">#3509</a>) by <a href="https://github.com/Noah-Houghton"><code>@​Noah-Houghton</code></a></li> </ul> <h3>Documentation (DOC)</h3> <ul> <li>Execute docs examples in CI (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3507">#3507</a>) by <a href="https://github.com/ievgen-kapinos"><code>@​ievgen-kapinos</code></a></li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.1.3...6.2.0">Full Changelog</a></p> <h2>Version 6.1.3, 2025-10-22</h2> <h2>What's new</h2> <h3>Security (SEC)</h3> <ul> <li>Allow limiting size of LZWDecode streams (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3502">#3502</a>) by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> <li>Avoid infinite loop when reading broken DCT-based inline images (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3501">#3501</a>) by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> </ul> <h3>Bug Fixes (BUG)</h3> <ul> <li>PageObject.scale() scales media box incorrectly (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3489">#3489</a>) by <a href="https://github.com/Nid01"><code>@​Nid01</code></a></li> </ul> <!-- raw HTML omitted --> </blockquote> <p>... (truncated)</p> </details> <details> <summary>Changelog</summary> <p><em>Sourced from <a href="https://github.com/py-pdf/pypdf/blob/main/CHANGELOG.md">pypdf's changelog</a>.</em></p> <blockquote> <h2>Version 6.4.0, 2025-11-23</h2> <h3>Security (SEC)</h3> <ul> <li>Reduce default limit for LZW decoding</li> </ul> <h3>New Features (ENH)</h3> <ul> <li>Parse and format comb fields in text widget annotations (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3519">#3519</a>)</li> </ul> <h3>Robustness (ROB)</h3> <ul> <li>Silently ignore Adobe Ascii85 whitespace for suffix detection (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3528">#3528</a>)</li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.3.0...6.4.0">Full Changelog</a></p> <h2>Version 6.3.0, 2025-11-16</h2> <h3>New Features (ENH)</h3> <ul> <li>Wrap and align text in flattened PDF forms (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3465">#3465</a>)</li> </ul> <h3>Bug Fixes (BUG)</h3> <ul> <li>Fix missing &quot;PreventGC&quot; when cloning (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3520">#3520</a>)</li> <li>Preserve JPEG image quality by default (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3516">#3516</a>)</li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.2.0...6.3.0">Full Changelog</a></p> <h2>Version 6.2.0, 2025-11-09</h2> <h3>New Features (ENH)</h3> <ul> <li>Add 'strict' parameter to PDFWriter (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3503">#3503</a>)</li> </ul> <h3>Bug Fixes (BUG)</h3> <ul> <li>PdfWriter.append fails when there are articles being None (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3509">#3509</a>)</li> </ul> <h3>Documentation (DOC)</h3> <ul> <li>Execute docs examples in CI (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3507">#3507</a>)</li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.1.3...6.2.0">Full Changelog</a></p> <h2>Version 6.1.3, 2025-10-22</h2> <h3>Security (SEC)</h3> <ul> <li>Allow limiting size of LZWDecode streams (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3502">#3502</a>)</li> <li>Avoid infinite loop when reading broken DCT-based inline images (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3501">#3501</a>)</li> </ul> <h3>Bug Fixes (BUG)</h3> <ul> <li>PageObject.scale() scales media box incorrectly (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3489">#3489</a>)</li> </ul> <h3>Robustness (ROB)</h3> <ul> <li>Fail with explicit exception when image mode is an empty array (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3500">#3500</a>)</li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.1.2...6.1.3">Full Changelog</a></p> <!-- raw HTML omitted --> </blockquote> <p>... (truncated)</p> </details> <details> <summary>Commits</summary> <ul> <li><a href="https://github.com/py-pdf/pypdf/commit/310e571f2be1bc406a20b738e870d1b556d3c3a5"><code>310e571</code></a> REL: 6.4.0</li> <li><a href="https://github.com/py-pdf/pypdf/commit/96186725e5e6f237129a58a97cd19204a9ce40b2"><code>9618672</code></a> Merge commit from fork</li> <li><a href="https://github.com/py-pdf/pypdf/commit/41e2e55c15ac523bfe504ebd2cfe83b777faeaac"><code>41e2e55</code></a> MAINT: Disable automated tagging on release</li> <li><a href="https://github.com/py-pdf/pypdf/commit/82faf984c0345d89ea757712665a950e28115eae"><code>82faf98</code></a> ROB: Silently ignore Adobe Ascii85 whitespace for suffix detection (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3528">#3528</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/cd172d91dacbb4ac6629157cfbe302256e7f86d3"><code>cd172d9</code></a> DEV: Bump actions/checkout from 5 to 6 (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3531">#3531</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/ff561f447308f6c3e915ff22b47867734c4d5263"><code>ff561f4</code></a> STY: Tweak PdfWriter (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3337">#3337</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/e9e3735f12f7668075e85c2a97293020db337b67"><code>e9e3735</code></a> MAINT: Update comments, check for warning message (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3521">#3521</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/905745a12c8d8a2cf667282bc6ae34c5c5422673"><code>905745a</code></a> TST: Add test for retrieving P image with alpha mask (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3525">#3525</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/bd433f7ae0ac105c12aa27396330bf1212bf4b7f"><code>bd433f7</code></a> ENH: Parse and format comb fields in text widget annotations (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3519">#3519</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/c0caa5d2c8a00ce8d9eaef6d4aa02bdb79c2ce7b"><code>c0caa5d</code></a> REL: 6.3.0</li> <li>Additional commits viewable in <a href="https://github.com/py-pdf/pypdf/compare/6.0.0...6.4.0">compare view</a></li> </ul> </details> <br /> [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=pypdf&package-manager=pip&previous-version=6.0.0&new-version=6.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) --- <details> <summary>Dependabot commands and options</summary> <br /> You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/infiniflow/ragflow/network/alerts). </details> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-25 14:26:43 +08:00
"pypdf==6.4.0",
"python-dotenv==1.0.1",
"python-dateutil==2.8.2",
"python-pptx>=1.0.2,<2.0.0",
"pywencai==0.12.2",
"qianfan==0.4.6",
"quart-auth==0.11.0",
"quart-cors==0.8.0",
"Quart==0.20.0",
"ranx==0.3.20",
"readability-lxml==0.8.1",
"valkey==6.0.2",
"requests>=2.32.3,<3.0.0",
"replicate==0.31.0",
"roman-numbers==1.0.2",
"ruamel-base==1.0.0",
"ruamel-yaml>=0.18.6,<0.19.0",
"scholarly==1.7.11",
"scikit-learn==1.5.0",
"selenium==4.22.0",
"selenium-wire==5.1.0",
"setuptools>=78.1.1,<81.0.0",
"shapely==2.0.5",
"six==1.16.0",
"slack-sdk==3.37.0",
"strenum==0.4.15",
"tabulate==0.9.0",
"tavily-python==0.5.1",
"tencentcloud-sdk-python==3.0.1478",
"tika==2.6.0",
"tiktoken==0.7.0",
"umap_learn==0.5.6",
"vertexai==1.70.0",
"google-genai>=1.41.0,<2.0.0",
"volcengine==1.0.194",
"voyageai==0.2.3",
"webdav4>=0.10.0,<0.11.0",
"webdriver-manager==4.0.1",
"werkzeug==3.0.6",
"wikipedia==1.4.0",
"word2number==1.1",
"xgboost==1.6.0",
"xpinyin==0.7.6",
"yfinance==0.2.65",
"zhipuai==2.0.1",
"google-generativeai>=0.8.1,<0.9.0", # Needed for cv_model and embedding_model
"python-docx>=1.1.2,<2.0.0",
"pypdf2>=3.0.1,<4.0.0",
"graspologic @ git+https://github.com/yuzhichang/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
"mini-racer>=0.12.4,<0.13.0",
"pyodbc>=5.2.0,<6.0.0",
"flasgger>=0.9.7.1,<0.10.0",
"xxhash>=3.5.0,<4.0.0",
"trio>=0.17.0,<0.29.0",
"langfuse>=2.60.0",
"debugpy>=1.8.13",
"mcp>=1.9.4",
Feat: Support tool calling in Generate component (#7572) ### What problem does this PR solve? Hello, our use case requires LLM agent to invoke some tools, so I made a simple implementation here. This PR does two things: 1. A simple plugin mechanism based on `pluginlib`: This mechanism lives in the `plugin` directory. It will only load plugins from `plugin/embedded_plugins` for now. A sample plugin `bad_calculator.py` is placed in `plugin/embedded_plugins/llm_tools`, it accepts two numbers `a` and `b`, then give a wrong result `a + b + 100`. In the future, it can load plugins from external location with little code change. Plugins are divided into different types. The only plugin type supported in this PR is `llm_tools`, which must implement the `LLMToolPlugin` class in the `plugin/llm_tool_plugin.py`. More plugin types can be added in the future. 2. A tool selector in the `Generate` component: Added a tool selector to select one or more tools for LLM: ![image](https://github.com/user-attachments/assets/74a21fdf-9333-4175-991b-43df6524c5dc) And with the `bad_calculator` tool, it results this with the `qwen-max` model: ![image](https://github.com/user-attachments/assets/93aff9c4-8550-414a-90a2-1a15a5249d94) ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
2025-05-16 16:32:19 +08:00
"opensearch-py==2.7.1",
"pluginlib==0.9.4",
"click>=8.1.8",
Add fallback to use 'calamine' parse engine in excel_parser.py (#9374) ### What problem does this PR solve? add fallback to `calamine` engine when parse error raised using the default `openpyxl` / `xlrd` engine. e.g. the following error can be fixed: ``` Traceback (most recent call last): File "/ragflow/deepdoc/parser/excel_parser.py", line 53, in _load_excel_to_workbook df = pd.read_excel(file_like_object) File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 495, in read_excel io = ExcelFile( File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 1567, in __init__ self._reader = self._engines[engine]( File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_xlrd.py", line 46, in __init__ super().__init__( File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 573, in __init__ self.book = self.load_workbook(self.handles.handle, engine_kwargs) File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_xlrd.py", line 63, in load_workbook return open_workbook(file_contents=data, **engine_kwargs) File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/__init__.py", line 172, in open_workbook bk = open_workbook_xls( File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/book.py", line 68, in open_workbook_xls bk.biff2_8_load( File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/book.py", line 641, in biff2_8_load cd.locate_named_stream(UNICODE_LITERAL(qname)) File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/compdoc.py", line 398, in locate_named_stream result = self._locate_stream( File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/compdoc.py", line 429, in _locate_stream raise CompDocError("%s corruption: seen[%d] == %d" % (qname, s, self.seen[s])) xlrd.compdoc.CompDocError: Workbook corruption: seen[2] == 4 ``` ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2025-08-12 12:41:33 +08:00
"python-calamine>=0.4.0",
"litellm>=1.74.15.post1",
"flask-mail>=0.10.0",
Feat: add admin CLI and admin service (#10186) ### What problem does this PR solve? Introduce new feature: RAGFlow system admin service and CLI ### Introduction Admin Service is a dedicated management component designed to monitor, maintain, and administrate the RAGFlow system. It provides comprehensive tools for ensuring system stability, performing operational tasks, and managing users and permissions efficiently. The service offers monitoring of critical components, including the RAGFlow server, Task Executor processes, and dependent services such as MySQL, Infinity / Elasticsearch, Redis, and MinIO. It automatically checks their health status, resource usage, and uptime, and performs restarts in case of failures to minimize downtime. For user and system management, it supports listing, creating, modifying, and deleting users and their associated resources like knowledge bases and Agents. Built with scalability and reliability in mind, the Admin Service ensures smooth system operation and simplifies maintenance workflows. It consists of a server-side Service and a command-line client (CLI), both implemented in Python. User commands are parsed using the Lark parsing toolkit. - **Admin Service**: A backend service that interfaces with the RAGFlow system to execute administrative operations and monitor its status. - **Admin CLI**: A command-line interface that allows users to connect to the Admin Service and issue commands for system management. ### Starting the Admin Service 1. Before start Admin Service, please make sure RAGFlow system is already started. 2. Run the service script: ```bash python admin/admin_server.py ``` The service will start and listen for incoming connections from the CLI on the configured port. ### Using the Admin CLI 1. Ensure the Admin Service is running. 2. Launch the CLI client: ```bash python admin/admin_client.py -h 0.0.0.0 -p 9381 ## Supported Commands Commands are case-insensitive and must be terminated with a semicolon (`;`). ### Service Management Commands - [x] `LIST SERVICES;` - Lists all available services within the RAGFlow system. - [ ] `SHOW SERVICE <id>;` - Shows detailed status information for the service identified by `<id>`. - [ ] `STARTUP SERVICE <id>;` - Attempts to start the service identified by `<id>`. - [ ] `SHUTDOWN SERVICE <id>;` - Attempts to gracefully shut down the service identified by `<id>`. - [ ] `RESTART SERVICE <id>;` - Attempts to restart the service identified by `<id>`. ### User Management Commands - [x] `LIST USERS;` - Lists all users known to the system. - [ ] `SHOW USER '<username>';` - Shows details and permissions for the specified user. The username must be enclosed in single or double quotes. - [ ] `DROP USER '<username>';` - Removes the specified user from the system. Use with caution. - [ ] `ALTER USER PASSWORD '<username>' '<new_password>';` - Changes the password for the specified user. ### Data and Agent Commands - [ ] `LIST DATASETS OF '<username>';` - Lists the datasets associated with the specified user. - [ ] `LIST AGENTS OF '<username>';` - Lists the agents associated with the specified user. ### Meta-Commands Meta-commands are prefixed with a backslash (`\`). - `\?` or `\help` - Shows help information for the available commands. - `\q` or `\quit` - Exits the CLI application. ## Examples ```commandline admin> list users; +-------------------------------+------------------------+-----------+-------------+ | create_date | email | is_active | nickname | +-------------------------------+------------------------+-----------+-------------+ | Fri, 22 Nov 2024 16:03:41 GMT | jeffery@infiniflow.org | 1 | Jeffery | | Fri, 22 Nov 2024 16:10:55 GMT | aya@infiniflow.org | 1 | Waterdancer | +-------------------------------+------------------------+-----------+-------------+ admin> list services; +-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+ | extra | host | id | name | port | service_type | +-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+ | {} | 0.0.0.0 | 0 | ragflow_0 | 9380 | ragflow_server | | {'meta_type': 'mysql', 'password': 'infini_rag_flow', 'username': 'root'} | localhost | 1 | mysql | 5455 | meta_data | | {'password': 'infini_rag_flow', 'store_type': 'minio', 'user': 'rag_flow'} | localhost | 2 | minio | 9000 | file_store | | {'password': 'infini_rag_flow', 'retrieval_type': 'elasticsearch', 'username': 'elastic'} | localhost | 3 | elasticsearch | 1200 | retrieval | | {'db_name': 'default_db', 'retrieval_type': 'infinity'} | localhost | 4 | infinity | 23817 | retrieval | | {'database': 1, 'mq_type': 'redis', 'password': 'infini_rag_flow'} | localhost | 5 | redis | 6379 | message_queue | +-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+ ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) Signed-off-by: jinhai <haijin.chn@gmail.com>
2025-09-22 10:37:49 +08:00
"lark>=1.2.2",
"mammoth>=1.11.0",
"markdownify>=1.2.0",
"captcha>=0.7.1",
"pip>=25.2",
"moodlepy>=0.23.0",
"pypandoc>=1.16",
"pyobvector==0.2.18",
"exceptiongroup>=1.3.0,<2.0.0",
"ffmpeg-python>=0.2.0",
"imageio-ffmpeg>=0.6.0",
]
[dependency-groups]
test = [
"hypothesis>=6.132.0",
"openpyxl>=3.1.5",
"pillow>=10.4.0",
"pytest>=8.3.5",
"pytest-asyncio>=1.3.0",
"pytest-xdist>=3.8.0",
"pytest-cov>=7.0.0",
"python-docx>=1.1.2",
"python-pptx>=1.0.2",
"reportlab>=4.4.1",
"requests>=2.32.2",
"requests-toolbelt>=1.0.0",
]
[[tool.uv.index]]
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
[tool.setuptools]
packages = [
'agent',
'agentic_reasoning',
'api',
'deepdoc',
'graphrag',
'intergrations.chatgpt-on-wechat.plugins',
'mcp.server',
'rag',
'sdk.python.ragflow_sdk',
]
[tool.ruff]
line-length = 200
exclude = [".venv", "rag/svr/discord_svr.py"]
[tool.ruff.lint]
extend-select = ["ASYNC", "ASYNC1"]
ignore = ["E402"]
[tool.pytest.ini_options]
pythonpath = [
"."
]
testpaths = ["test"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
markers = [
"p1: high priority test cases",
"p2: medium priority test cases",
"p3: low priority test cases",
]
# Test collection and runtime configuration
filterwarnings = [
"error", # Treat warnings as errors
"ignore::DeprecationWarning", # Ignore specific warnings
]
# Command line options
addopts = [
"-v", # Verbose output
"--strict-markers", # Enforce marker definitions
"--tb=short", # Simplified traceback
"--disable-warnings", # Disable warnings
"--color=yes" # Colored output
]
# Coverage configuration
[tool.coverage.run]
# Source paths - adjust according to your project structure
source = [
# "../../api/db/services",
# Add more directories if needed:
"../../common",
# "../../utils",
]
# Files/directories to exclude
omit = [
"*/tests/*",
"*/test_*",
"*/__pycache__/*",
"*/.pytest_cache/*",
"*/venv/*",
"*/.venv/*",
"*/env/*",
"*/site-packages/*",
"*/dist/*",
"*/build/*",
"*/migrations/*",
"setup.py"
]
[tool.coverage.report]
# Report configuration
precision = 2
show_missing = true
skip_covered = false
fail_under = 0 # Minimum coverage requirement (0-100)
# Lines to exclude (optional)
exclude_lines = [
# "pragma: no cover",
# "def __repr__",
# "raise AssertionError",
# "raise NotImplementedError",
# "if __name__ == .__main__.:",
# "if TYPE_CHECKING:",
"pass"
]
[tool.coverage.html]
# HTML report configuration
directory = "htmlcov"
title = "Test Coverage Report"
# extra_css = "custom.css" # Optional custom CSS