2025-01-14 11:49:43 +08:00
[ project ]
2024-09-23 10:00:44 +08:00
name = "ragflow"
2025-11-19 09:50:23 +08:00
version = "0.22.1"
2024-09-23 10:00:44 +08:00
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
2025-05-14 21:23:29 -07:00
authors = [ { name = "Zhichang Yu" , email = "yuzhichang@gmail.com" } ]
license-files = [ "LICENSE" ]
2024-09-23 10:00:44 +08:00
readme = "README.md"
2025-12-09 19:55:25 +08:00
requires-python = ">=3.12,<3.15"
2025-01-14 11:49:43 +08:00
dependencies = [
2025-10-23 23:02:27 +08:00
"datrie>=0.8.3,<0.9.0" ,
2025-01-20 11:17:59 +08:00
"akshare>=1.15.78,<2.0.0" ,
2025-01-14 11:49:43 +08:00
"azure-storage-blob==12.22.0" ,
"azure-identity==1.17.1" ,
"azure-storage-file-datalake==12.16.0" ,
"anthropic==0.34.1" ,
"arxiv==2.1.3" ,
2025-12-12 20:16:18 +08:00
"aspose-slides==24.7.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')" ,
2025-11-03 19:59:18 +08:00
"atlassian-python-api==4.0.7" ,
2025-11-22 11:56:43 +08:00
"beartype>=0.20.0,<1.0.0" ,
2025-01-14 11:49:43 +08:00
"bio==1.7.1" ,
"blinker==1.7.0" ,
"boto3==1.34.140" ,
"botocore==1.34.140" ,
"cachetools==5.3.3" ,
"chardet==5.2.0" ,
"cn2an==0.5.22" ,
"cohere==5.6.2" ,
2025-12-09 19:55:25 +08:00
"Crawl4AI>=0.4.0,<1.0.0" ,
2025-01-14 11:49:43 +08:00
"dashscope==1.20.11" ,
"deepl==1.18.0" ,
"demjson3==3.0.6" ,
"discord-py==2.3.2" ,
2025-11-03 19:59:18 +08:00
"dropbox==12.0.2" ,
2025-01-14 11:49:43 +08:00
"duckduckgo-search>=7.2.0,<8.0.0" ,
"editdistance==0.8.1" ,
"elastic-transport==8.12.0" ,
"elasticsearch==8.12.1" ,
"elasticsearch-dsl==8.12.0" ,
2025-10-09 12:36:19 +08:00
"extract-msg>=0.39.0" ,
2025-01-14 11:49:43 +08:00
"filelock==3.15.4" ,
"flask==3.0.3" ,
"flask-cors==5.0.0" ,
"flask-login==0.6.3" ,
"flask-session==0.8.0" ,
"google-search-results==2.4.2" ,
2025-11-10 19:15:02 +08:00
"google-auth-oauthlib>=1.2.0,<2.0.0" ,
2025-01-14 11:49:43 +08:00
"groq==0.9.0" ,
"hanziconv==0.3.2" ,
"html-text==0.6.2" ,
2025-10-15 08:54:20 +02:00
"httpx[socks]>=0.28.1,<0.29.0" ,
2025-01-14 11:49:43 +08:00
"huggingface-hub>=0.25.0,<0.26.0" ,
2025-12-09 16:23:37 +08:00
"infinity-sdk==0.6.11" ,
2025-01-14 11:49:43 +08:00
"infinity-emb>=0.0.66,<0.0.67" ,
"itsdangerous==2.1.2" ,
2025-01-22 19:43:14 +08:00
"json-repair==0.35.0" ,
2025-11-03 19:59:18 +08:00
"jira==3.10.5" ,
2025-01-14 11:49:43 +08:00
"markdown==3.6" ,
"markdown-to-json==2.1.1" ,
"minio==7.2.4" ,
"mistralai==0.4.2" ,
2025-11-03 19:59:18 +08:00
"mypy-boto3-s3==1.40.26" ,
2025-01-14 11:49:43 +08:00
"nltk==3.9.1" ,
"numpy>=1.26.0,<2.0.0" ,
2025-11-03 19:59:18 +08:00
"Office365-REST-Python-Client==2.6.2" ,
2025-10-15 08:54:20 +02:00
"ollama>=0.5.0" ,
2025-12-09 19:55:25 +08:00
"onnxruntime==1.23.2; sys_platform == 'darwin' or platform_machine != 'x86_64'" ,
"onnxruntime-gpu==1.23.2; sys_platform != 'darwin' and platform_machine == 'x86_64'" ,
2025-08-12 10:59:20 +08:00
"openai>=1.45.0" ,
2025-01-14 11:49:43 +08:00
"opencv-python==4.10.0.84" ,
"opencv-python-headless==4.10.0.84" ,
"openpyxl>=3.1.0,<4.0.0" ,
2025-06-12 11:37:42 +08:00
"opendal>=0.45.0,<0.46.0" ,
2025-01-14 11:49:43 +08:00
"ormsgpack==1.5.0" ,
"pandas>=2.2.0,<3.0.0" ,
"pdfplumber==0.10.4" ,
"peewee==3.17.1" ,
2025-12-09 19:55:25 +08:00
"pillow>=10.4.0,<13.0.0" ,
2025-01-14 11:49:43 +08:00
"protobuf==5.27.2" ,
2025-12-09 19:55:25 +08:00
"psycopg2-binary>=2.9.11,<3.0.0" ,
"pyclipper>=1.4.0,<2.0.0" ,
2025-01-14 11:49:43 +08:00
"pycryptodomex==3.20.0" ,
2025-06-12 11:37:42 +08:00
"pymysql>=1.1.1,<2.0.0" ,
Chore(deps): Bump pypdf from 6.0.0 to 6.4.0 (#11505)
Bumps [pypdf](https://github.com/py-pdf/pypdf) from 6.0.0 to 6.4.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/py-pdf/pypdf/releases">pypdf's
releases</a>.</em></p>
<blockquote>
<h2>Version 6.4.0, 2025-11-23</h2>
<h2>What's new</h2>
<h3>Security (SEC)</h3>
<ul>
<li>Reduce default limit for LZW decoding by <a
href="https://github.com/stefan6419846"><code>@stefan6419846</code></a></li>
</ul>
<h3>New Features (ENH)</h3>
<ul>
<li>Parse and format comb fields in text widget annotations (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3519">#3519</a>)
by <a href="https://github.com/PJBrs"><code>@PJBrs</code></a></li>
</ul>
<h3>Robustness (ROB)</h3>
<ul>
<li>Silently ignore Adobe Ascii85 whitespace for suffix detection (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3528">#3528</a>)
by <a href="https://github.com/mbierma"><code>@mbierma</code></a></li>
</ul>
<p><a href="https://github.com/py-pdf/pypdf/compare/6.3.0...6.4.0">Full
Changelog</a></p>
<h2>Version 6.3.0, 2025-11-16</h2>
<h2>What's new</h2>
<h3>New Features (ENH)</h3>
<ul>
<li>Wrap and align text in flattened PDF forms (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3465">#3465</a>)
by <a href="https://github.com/PJBrs"><code>@PJBrs</code></a></li>
</ul>
<h3>Bug Fixes (BUG)</h3>
<ul>
<li>Fix missing "PreventGC" when cloning (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3520">#3520</a>)
by <a
href="https://github.com/patrick91"><code>@patrick91</code></a></li>
<li>Preserve JPEG image quality by default (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3516">#3516</a>)
by <a href="https://github.com/Lucas-C"><code>@Lucas-C</code></a></li>
</ul>
<p><a href="https://github.com/py-pdf/pypdf/compare/6.2.0...6.3.0">Full
Changelog</a></p>
<h2>Version 6.2.0, 2025-11-09</h2>
<h2>What's new</h2>
<h3>New Features (ENH)</h3>
<ul>
<li>Add 'strict' parameter to PDFWriter (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3503">#3503</a>)
by <a
href="https://github.com/Arya-A-Nair"><code>@Arya-A-Nair</code></a></li>
</ul>
<h3>Bug Fixes (BUG)</h3>
<ul>
<li>PdfWriter.append fails when there are articles being None (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3509">#3509</a>)
by <a
href="https://github.com/Noah-Houghton"><code>@Noah-Houghton</code></a></li>
</ul>
<h3>Documentation (DOC)</h3>
<ul>
<li>Execute docs examples in CI (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3507">#3507</a>)
by <a
href="https://github.com/ievgen-kapinos"><code>@ievgen-kapinos</code></a></li>
</ul>
<p><a href="https://github.com/py-pdf/pypdf/compare/6.1.3...6.2.0">Full
Changelog</a></p>
<h2>Version 6.1.3, 2025-10-22</h2>
<h2>What's new</h2>
<h3>Security (SEC)</h3>
<ul>
<li>Allow limiting size of LZWDecode streams (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3502">#3502</a>)
by <a
href="https://github.com/stefan6419846"><code>@stefan6419846</code></a></li>
<li>Avoid infinite loop when reading broken DCT-based inline images (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3501">#3501</a>)
by <a
href="https://github.com/stefan6419846"><code>@stefan6419846</code></a></li>
</ul>
<h3>Bug Fixes (BUG)</h3>
<ul>
<li>PageObject.scale() scales media box incorrectly (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3489">#3489</a>)
by <a href="https://github.com/Nid01"><code>@Nid01</code></a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/py-pdf/pypdf/blob/main/CHANGELOG.md">pypdf's
changelog</a>.</em></p>
<blockquote>
<h2>Version 6.4.0, 2025-11-23</h2>
<h3>Security (SEC)</h3>
<ul>
<li>Reduce default limit for LZW decoding</li>
</ul>
<h3>New Features (ENH)</h3>
<ul>
<li>Parse and format comb fields in text widget annotations (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3519">#3519</a>)</li>
</ul>
<h3>Robustness (ROB)</h3>
<ul>
<li>Silently ignore Adobe Ascii85 whitespace for suffix detection (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3528">#3528</a>)</li>
</ul>
<p><a href="https://github.com/py-pdf/pypdf/compare/6.3.0...6.4.0">Full
Changelog</a></p>
<h2>Version 6.3.0, 2025-11-16</h2>
<h3>New Features (ENH)</h3>
<ul>
<li>Wrap and align text in flattened PDF forms (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3465">#3465</a>)</li>
</ul>
<h3>Bug Fixes (BUG)</h3>
<ul>
<li>Fix missing "PreventGC" when cloning (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3520">#3520</a>)</li>
<li>Preserve JPEG image quality by default (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3516">#3516</a>)</li>
</ul>
<p><a href="https://github.com/py-pdf/pypdf/compare/6.2.0...6.3.0">Full
Changelog</a></p>
<h2>Version 6.2.0, 2025-11-09</h2>
<h3>New Features (ENH)</h3>
<ul>
<li>Add 'strict' parameter to PDFWriter (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3503">#3503</a>)</li>
</ul>
<h3>Bug Fixes (BUG)</h3>
<ul>
<li>PdfWriter.append fails when there are articles being None (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3509">#3509</a>)</li>
</ul>
<h3>Documentation (DOC)</h3>
<ul>
<li>Execute docs examples in CI (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3507">#3507</a>)</li>
</ul>
<p><a href="https://github.com/py-pdf/pypdf/compare/6.1.3...6.2.0">Full
Changelog</a></p>
<h2>Version 6.1.3, 2025-10-22</h2>
<h3>Security (SEC)</h3>
<ul>
<li>Allow limiting size of LZWDecode streams (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3502">#3502</a>)</li>
<li>Avoid infinite loop when reading broken DCT-based inline images (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3501">#3501</a>)</li>
</ul>
<h3>Bug Fixes (BUG)</h3>
<ul>
<li>PageObject.scale() scales media box incorrectly (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3489">#3489</a>)</li>
</ul>
<h3>Robustness (ROB)</h3>
<ul>
<li>Fail with explicit exception when image mode is an empty array (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3500">#3500</a>)</li>
</ul>
<p><a href="https://github.com/py-pdf/pypdf/compare/6.1.2...6.1.3">Full
Changelog</a></p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/py-pdf/pypdf/commit/310e571f2be1bc406a20b738e870d1b556d3c3a5"><code>310e571</code></a>
REL: 6.4.0</li>
<li><a
href="https://github.com/py-pdf/pypdf/commit/96186725e5e6f237129a58a97cd19204a9ce40b2"><code>9618672</code></a>
Merge commit from fork</li>
<li><a
href="https://github.com/py-pdf/pypdf/commit/41e2e55c15ac523bfe504ebd2cfe83b777faeaac"><code>41e2e55</code></a>
MAINT: Disable automated tagging on release</li>
<li><a
href="https://github.com/py-pdf/pypdf/commit/82faf984c0345d89ea757712665a950e28115eae"><code>82faf98</code></a>
ROB: Silently ignore Adobe Ascii85 whitespace for suffix detection (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3528">#3528</a>)</li>
<li><a
href="https://github.com/py-pdf/pypdf/commit/cd172d91dacbb4ac6629157cfbe302256e7f86d3"><code>cd172d9</code></a>
DEV: Bump actions/checkout from 5 to 6 (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3531">#3531</a>)</li>
<li><a
href="https://github.com/py-pdf/pypdf/commit/ff561f447308f6c3e915ff22b47867734c4d5263"><code>ff561f4</code></a>
STY: Tweak PdfWriter (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3337">#3337</a>)</li>
<li><a
href="https://github.com/py-pdf/pypdf/commit/e9e3735f12f7668075e85c2a97293020db337b67"><code>e9e3735</code></a>
MAINT: Update comments, check for warning message (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3521">#3521</a>)</li>
<li><a
href="https://github.com/py-pdf/pypdf/commit/905745a12c8d8a2cf667282bc6ae34c5c5422673"><code>905745a</code></a>
TST: Add test for retrieving P image with alpha mask (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3525">#3525</a>)</li>
<li><a
href="https://github.com/py-pdf/pypdf/commit/bd433f7ae0ac105c12aa27396330bf1212bf4b7f"><code>bd433f7</code></a>
ENH: Parse and format comb fields in text widget annotations (<a
href="https://redirect.github.com/py-pdf/pypdf/issues/3519">#3519</a>)</li>
<li><a
href="https://github.com/py-pdf/pypdf/commit/c0caa5d2c8a00ce8d9eaef6d4aa02bdb79c2ce7b"><code>c0caa5d</code></a>
REL: 6.3.0</li>
<li>Additional commits viewable in <a
href="https://github.com/py-pdf/pypdf/compare/6.0.0...6.4.0">compare
view</a></li>
</ul>
</details>
<br />
[](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)
Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.
[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)
---
<details>
<summary>Dependabot commands and options</summary>
<br />
You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/infiniflow/ragflow/network/alerts).
</details>
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-25 14:26:43 +08:00
"pypdf==6.4.0" ,
2025-01-14 11:49:43 +08:00
"python-dotenv==1.0.1" ,
"python-dateutil==2.8.2" ,
"python-pptx>=1.0.2,<2.0.0" ,
2025-12-09 19:55:25 +08:00
"pywencai>=0.13.1,<1.0.0" ,
2025-01-14 11:49:43 +08:00
"qianfan==0.4.6" ,
2025-11-18 17:05:16 +08:00
"quart-auth==0.11.0" ,
"quart-cors==0.8.0" ,
"Quart==0.20.0" ,
2025-01-14 11:49:43 +08:00
"ranx==0.3.20" ,
2025-12-09 19:55:25 +08:00
"readability-lxml>=0.8.4,<1.0.0" ,
2025-01-14 11:49:43 +08:00
"valkey==6.0.2" ,
2025-12-09 16:23:37 +08:00
"requests>=2.32.3,<3.0.0" ,
2025-01-14 11:49:43 +08:00
"replicate==0.31.0" ,
"roman-numbers==1.0.2" ,
"ruamel-base==1.0.0" ,
2025-06-12 11:37:42 +08:00
"ruamel-yaml>=0.18.6,<0.19.0" ,
2025-01-14 11:49:43 +08:00
"scholarly==1.7.11" ,
"scikit-learn==1.5.0" ,
"selenium==4.22.0" ,
"selenium-wire==5.1.0" ,
2025-12-09 16:23:37 +08:00
"setuptools>=78.1.1,<81.0.0" ,
2025-01-14 11:49:43 +08:00
"shapely==2.0.5" ,
"six==1.16.0" ,
2025-11-03 19:59:18 +08:00
"slack-sdk==3.37.0" ,
2025-01-14 11:49:43 +08:00
"strenum==0.4.15" ,
"tabulate==0.9.0" ,
2025-02-26 10:21:04 +08:00
"tavily-python==0.5.1" ,
2025-10-27 15:14:58 +08:00
"tencentcloud-sdk-python==3.0.1478" ,
2025-01-14 11:49:43 +08:00
"tika==2.6.0" ,
"tiktoken==0.7.0" ,
"umap_learn==0.5.6" ,
2025-10-15 08:54:20 +02:00
"vertexai==1.70.0" ,
"google-genai>=1.41.0,<2.0.0" ,
2025-07-30 19:41:09 +08:00
"volcengine==1.0.194" ,
2025-01-14 11:49:43 +08:00
"voyageai==0.2.3" ,
2025-11-26 07:14:42 +01:00
"webdav4>=0.10.0,<0.11.0" ,
2025-01-14 11:49:43 +08:00
"webdriver-manager==4.0.1" ,
"werkzeug==3.0.6" ,
"wikipedia==1.4.0" ,
"word2number==1.1" ,
2025-05-27 09:28:52 +08:00
"xgboost==1.6.0" ,
2025-01-14 11:49:43 +08:00
"xpinyin==0.7.6" ,
2025-07-30 19:41:09 +08:00
"yfinance==0.2.65" ,
2025-01-14 11:49:43 +08:00
"zhipuai==2.0.1" ,
2025-10-16 15:07:49 +08:00
"google-generativeai>=0.8.1,<0.9.0" , # Needed for cv_model and embedding_model
2025-01-14 11:49:43 +08:00
"python-docx>=1.1.2,<2.0.0" ,
"pypdf2>=3.0.1,<4.0.0" ,
2025-11-22 11:56:43 +08:00
"graspologic @ git+https://github.com/yuzhichang/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd" ,
2025-01-14 11:49:43 +08:00
"mini-racer>=0.12.4,<0.13.0" ,
"pyodbc>=5.2.0,<6.0.0" ,
"flasgger>=0.9.7.1,<0.10.0" ,
2025-03-03 18:59:49 +08:00
"xxhash>=3.5.0,<4.0.0" ,
2025-11-21 12:58:49 +01:00
"trio>=0.17.0,<0.29.0" ,
2025-03-24 13:18:47 +08:00
"langfuse>=2.60.0" ,
2025-03-26 15:34:42 +08:00
"debugpy>=1.8.13" ,
2025-06-23 16:53:59 +08:00
"mcp>=1.9.4" ,
2025-05-16 16:32:19 +08:00
"opensearch-py==2.7.1" ,
"pluginlib==0.9.4" ,
2025-06-23 16:53:59 +08:00
"click>=8.1.8" ,
Add fallback to use 'calamine' parse engine in excel_parser.py (#9374)
### What problem does this PR solve?
add fallback to `calamine` engine when parse error raised using the
default `openpyxl` / `xlrd` engine.
e.g. the following error can be fixed:
```
Traceback (most recent call last):
File "/ragflow/deepdoc/parser/excel_parser.py", line 53, in _load_excel_to_workbook
df = pd.read_excel(file_like_object)
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 495, in read_excel
io = ExcelFile(
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 1567, in __init__
self._reader = self._engines[engine](
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_xlrd.py", line 46, in __init__
super().__init__(
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 573, in __init__
self.book = self.load_workbook(self.handles.handle, engine_kwargs)
File "/ragflow/.venv/lib/python3.10/site-packages/pandas/io/excel/_xlrd.py", line 63, in load_workbook
return open_workbook(file_contents=data, **engine_kwargs)
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/__init__.py", line 172, in open_workbook
bk = open_workbook_xls(
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/book.py", line 68, in open_workbook_xls
bk.biff2_8_load(
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/book.py", line 641, in biff2_8_load
cd.locate_named_stream(UNICODE_LITERAL(qname))
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/compdoc.py", line 398, in locate_named_stream
result = self._locate_stream(
File "/ragflow/.venv/lib/python3.10/site-packages/xlrd/compdoc.py", line 429, in _locate_stream
raise CompDocError("%s corruption: seen[%d] == %d" % (qname, s, self.seen[s]))
xlrd.compdoc.CompDocError: Workbook corruption: seen[2] == 4
```
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
2025-08-12 12:41:33 +08:00
"python-calamine>=0.4.0" ,
2025-08-12 10:59:20 +08:00
"litellm>=1.74.15.post1" ,
2025-08-15 18:12:20 +08:00
"flask-mail>=0.10.0" ,
Feat: add admin CLI and admin service (#10186)
### What problem does this PR solve?
Introduce new feature: RAGFlow system admin service and CLI
### Introduction
Admin Service is a dedicated management component designed to monitor,
maintain, and administrate the RAGFlow system. It provides comprehensive
tools for ensuring system stability, performing operational tasks, and
managing users and permissions efficiently.
The service offers monitoring of critical components, including the
RAGFlow server, Task Executor processes, and dependent services such as
MySQL, Infinity / Elasticsearch, Redis, and MinIO. It automatically
checks their health status, resource usage, and uptime, and performs
restarts in case of failures to minimize downtime.
For user and system management, it supports listing, creating,
modifying, and deleting users and their associated resources like
knowledge bases and Agents.
Built with scalability and reliability in mind, the Admin Service
ensures smooth system operation and simplifies maintenance workflows.
It consists of a server-side Service and a command-line client (CLI),
both implemented in Python. User commands are parsed using the Lark
parsing toolkit.
- **Admin Service**: A backend service that interfaces with the RAGFlow
system to execute administrative operations and monitor its status.
- **Admin CLI**: A command-line interface that allows users to connect
to the Admin Service and issue commands for system management.
### Starting the Admin Service
1. Before start Admin Service, please make sure RAGFlow system is
already started.
2. Run the service script:
```bash
python admin/admin_server.py
```
The service will start and listen for incoming connections from the CLI
on the configured port.
### Using the Admin CLI
1. Ensure the Admin Service is running.
2. Launch the CLI client:
```bash
python admin/admin_client.py -h 0.0.0.0 -p 9381
## Supported Commands
Commands are case-insensitive and must be terminated with a semicolon
(`;`).
### Service Management Commands
- [x] `LIST SERVICES;`
- Lists all available services within the RAGFlow system.
- [ ] `SHOW SERVICE <id>;`
- Shows detailed status information for the service identified by
`<id>`.
- [ ] `STARTUP SERVICE <id>;`
- Attempts to start the service identified by `<id>`.
- [ ] `SHUTDOWN SERVICE <id>;`
- Attempts to gracefully shut down the service identified by `<id>`.
- [ ] `RESTART SERVICE <id>;`
- Attempts to restart the service identified by `<id>`.
### User Management Commands
- [x] `LIST USERS;`
- Lists all users known to the system.
- [ ] `SHOW USER '<username>';`
- Shows details and permissions for the specified user. The username
must be enclosed in single or double quotes.
- [ ] `DROP USER '<username>';`
- Removes the specified user from the system. Use with caution.
- [ ] `ALTER USER PASSWORD '<username>' '<new_password>';`
- Changes the password for the specified user.
### Data and Agent Commands
- [ ] `LIST DATASETS OF '<username>';`
- Lists the datasets associated with the specified user.
- [ ] `LIST AGENTS OF '<username>';`
- Lists the agents associated with the specified user.
### Meta-Commands
Meta-commands are prefixed with a backslash (`\`).
- `\?` or `\help`
- Shows help information for the available commands.
- `\q` or `\quit`
- Exits the CLI application.
## Examples
```commandline
admin> list users;
+-------------------------------+------------------------+-----------+-------------+
| create_date | email | is_active | nickname |
+-------------------------------+------------------------+-----------+-------------+
| Fri, 22 Nov 2024 16:03:41 GMT | jeffery@infiniflow.org | 1 | Jeffery |
| Fri, 22 Nov 2024 16:10:55 GMT | aya@infiniflow.org | 1 | Waterdancer |
+-------------------------------+------------------------+-----------+-------------+
admin> list services;
+-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+
| extra | host | id | name | port | service_type |
+-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+
| {} | 0.0.0.0 | 0 | ragflow_0 | 9380 | ragflow_server |
| {'meta_type': 'mysql', 'password': 'infini_rag_flow', 'username': 'root'} | localhost | 1 | mysql | 5455 | meta_data |
| {'password': 'infini_rag_flow', 'store_type': 'minio', 'user': 'rag_flow'} | localhost | 2 | minio | 9000 | file_store |
| {'password': 'infini_rag_flow', 'retrieval_type': 'elasticsearch', 'username': 'elastic'} | localhost | 3 | elasticsearch | 1200 | retrieval |
| {'db_name': 'default_db', 'retrieval_type': 'infinity'} | localhost | 4 | infinity | 23817 | retrieval |
| {'database': 1, 'mq_type': 'redis', 'password': 'infini_rag_flow'} | localhost | 5 | redis | 6379 | message_queue |
+-------------------------------------------------------------------------------------------+-----------+----+---------------+-------+----------------+
```
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
Signed-off-by: jinhai <haijin.chn@gmail.com>
2025-09-22 10:37:49 +08:00
"lark>=1.2.2" ,
2025-10-10 09:39:15 +08:00
"mammoth>=1.11.0" ,
"markdownify>=1.2.0" ,
2025-10-16 15:07:49 +08:00
"captcha>=0.7.1" ,
2025-10-23 23:02:27 +08:00
"pip>=25.2" ,
2025-11-21 12:58:49 +01:00
"moodlepy>=0.23.0" ,
2025-11-14 19:52:11 +08:00
"pypandoc>=1.16" ,
2025-11-20 10:00:14 +08:00
"pyobvector==0.2.18" ,
2025-12-02 11:17:31 +08:00
"exceptiongroup>=1.3.0,<2.0.0" ,
"ffmpeg-python>=0.2.0" ,
"imageio-ffmpeg>=0.6.0" ,
2025-12-15 09:45:18 +08:00
"cryptography==46.0.3" ,
Feature/docs generator (#11858)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### What problem does this PR solve?
This PR introduces a new Docs Generator agent component for producing
downloadable PDF, DOCX, or TXT files from Markdown content generated
within a RAGFlow workflow.
### **Key Features**
**Backend**
- New component: DocsGenerator (agent/component/docs_generator.py)
-
- Markdown → PDF/DOCX/TXT conversion
-
- Supports tables, lists, code blocks, headings, and rich formatting
-
- Configurable document style (fonts, margins, colors, page size,
orientation)
-
- Optional header logo and footer with page numbers/timestamps
-
**Frontend**
- New configuration UI for the Docs Generator
-
- Download button integrated into the chat interface
-
- Output wired to the Message component
-
- Full i18n support
**Documentation**
Added component guide:
docs/guides/agent/agent_component_reference/docs_generator.md
**Usage**
Add the Docs Generator to a workflow, connect Markdown output from an
upstream component, configure metadata/style, and feed its output into
the Message component. Users will see a document download button
directly in the chat.
**Contributor Note**
We have been following RAGFlow since more than a year and half now and
have worked extensively on personalizing the framework and integrating
it into several of our internal systems. Over the past year and a half,
we have built multiple platforms that rely on RAGFlow as a core
component, which has given us a strong appreciation for how flexible and
powerful the project is.
We also previously contributed the full Italian translation, and we were
glad to see it accepted. This new Docs Generator component was created
for our own production needs, and we believe that it may be useful for
many others in the community as well.
We want to sincerely thank the entire RAGFlow team for the remarkable
work you have done and continue to do. If there are opportunities to
contribute further, we would be glad to help whenever we have time
available. It would be a pleasure to support the project in any way we
can.
If appropriate, we would be glad to be listed among the project’s
contributors, but in any case we look forward to continuing to support
and contribute to the project.
PentaFrame Development Team
---------
Co-authored-by: PentaFrame <info@pentaframe.it>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2025-12-12 07:59:43 +01:00
"reportlab>=4.4.1" ,
"jinja2>=3.1.0" ,
2025-12-12 10:23:40 +08:00
"boxsdk>=10.1.0" ,
Feature/docs generator (#11858)
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
### What problem does this PR solve?
This PR introduces a new Docs Generator agent component for producing
downloadable PDF, DOCX, or TXT files from Markdown content generated
within a RAGFlow workflow.
### **Key Features**
**Backend**
- New component: DocsGenerator (agent/component/docs_generator.py)
-
- Markdown → PDF/DOCX/TXT conversion
-
- Supports tables, lists, code blocks, headings, and rich formatting
-
- Configurable document style (fonts, margins, colors, page size,
orientation)
-
- Optional header logo and footer with page numbers/timestamps
-
**Frontend**
- New configuration UI for the Docs Generator
-
- Download button integrated into the chat interface
-
- Output wired to the Message component
-
- Full i18n support
**Documentation**
Added component guide:
docs/guides/agent/agent_component_reference/docs_generator.md
**Usage**
Add the Docs Generator to a workflow, connect Markdown output from an
upstream component, configure metadata/style, and feed its output into
the Message component. Users will see a document download button
directly in the chat.
**Contributor Note**
We have been following RAGFlow since more than a year and half now and
have worked extensively on personalizing the framework and integrating
it into several of our internal systems. Over the past year and a half,
we have built multiple platforms that rely on RAGFlow as a core
component, which has given us a strong appreciation for how flexible and
powerful the project is.
We also previously contributed the full Italian translation, and we were
glad to see it accepted. This new Docs Generator component was created
for our own production needs, and we believe that it may be useful for
many others in the community as well.
We want to sincerely thank the entire RAGFlow team for the remarkable
work you have done and continue to do. If there are opportunities to
contribute further, we would be glad to help whenever we have time
available. It would be a pleasure to support the project in any way we
can.
If appropriate, we would be glad to be listed among the project’s
contributors, but in any case we look forward to continuing to support
and contribute to the project.
PentaFrame Development Team
---------
Co-authored-by: PentaFrame <info@pentaframe.it>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2025-12-12 07:59:43 +01:00
"aiosmtplib>=5.0.0"
2025-03-03 18:59:49 +08:00
]
2025-06-03 15:21:06 +08:00
[ dependency-groups ]
test = [
"hypothesis>=6.132.0" ,
"openpyxl>=3.1.5" ,
2025-12-09 19:55:25 +08:00
"pillow>=10.4.0,<13.0.0" ,
2025-06-03 15:21:06 +08:00
"pytest>=8.3.5" ,
2025-12-05 11:40:16 +08:00
"pytest-asyncio>=1.3.0" ,
"pytest-xdist>=3.8.0" ,
"pytest-cov>=7.0.0" ,
2025-06-03 15:21:06 +08:00
"python-docx>=1.1.2" ,
"python-pptx>=1.0.2" ,
"reportlab>=4.4.1" ,
"requests>=2.32.2" ,
"requests-toolbelt>=1.0.0" ,
]
2025-09-26 14:55:19 +08:00
[ [ tool . uv . index ] ]
2025-10-10 12:41:45 +08:00
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
2025-09-26 14:55:19 +08:00
2025-03-21 18:44:49 +08:00
[ tool . setuptools ]
2025-06-04 13:16:17 +08:00
packages = [
'agent' ,
'agentic_reasoning' ,
'api' ,
'deepdoc' ,
'graphrag' ,
'intergrations.chatgpt-on-wechat.plugins' ,
'mcp.server' ,
'rag' ,
'sdk.python.ragflow_sdk' ,
]
2025-03-21 18:44:49 +08:00
2025-03-20 22:31:18 +08:00
[ tool . ruff ]
line-length = 200
2025-06-04 13:16:17 +08:00
exclude = [ ".venv" , "rag/svr/discord_svr.py" ]
2025-03-20 22:31:18 +08:00
[ tool . ruff . lint ]
extend-select = [ "ASYNC" , "ASYNC1" ]
ignore = [ "E402" ]
2025-06-04 13:16:17 +08:00
[ tool . pytest . ini_options ]
2025-12-05 11:40:16 +08:00
pythonpath = [
"."
]
testpaths = [ "test" ]
python_files = [ "test_*.py" ]
python_classes = [ "Test*" ]
python_functions = [ "test_*" ]
2025-06-04 13:16:17 +08:00
markers = [
"p1: high priority test cases" ,
"p2: medium priority test cases" ,
"p3: low priority test cases" ,
]
2025-12-05 11:40:16 +08:00
# Test collection and runtime configuration
filterwarnings = [
"error" , # Treat warnings as errors
"ignore::DeprecationWarning" , # Ignore specific warnings
]
# Command line options
addopts = [
"-v" , # Verbose output
"--strict-markers" , # Enforce marker definitions
"--tb=short" , # Simplified traceback
"--disable-warnings" , # Disable warnings
"--color=yes" # Colored output
]
# Coverage configuration
[ tool . coverage . run ]
# Source paths - adjust according to your project structure
source = [
# "../../api/db/services",
# Add more directories if needed:
"../../common" ,
# "../../utils",
]
# Files/directories to exclude
omit = [
"*/tests/*" ,
"*/test_*" ,
"*/__pycache__/*" ,
"*/.pytest_cache/*" ,
"*/venv/*" ,
"*/.venv/*" ,
"*/env/*" ,
"*/site-packages/*" ,
"*/dist/*" ,
"*/build/*" ,
"*/migrations/*" ,
"setup.py"
]
[ tool . coverage . report ]
# Report configuration
precision = 2
show_missing = true
skip_covered = false
fail_under = 0 # Minimum coverage requirement (0-100)
# Lines to exclude (optional)
exclude_lines = [
# "pragma: no cover",
# "def __repr__",
# "raise AssertionError",
# "raise NotImplementedError",
# "if __name__ == .__main__.:",
# "if TYPE_CHECKING:",
"pass"
]
[ tool . coverage . html ]
# HTML report configuration
directory = "htmlcov"
title = "Test Coverage Report"
# extra_css = "custom.css" # Optional custom CSS