Files
ragflow/pyproject.toml

330 lines
9.8 KiB
TOML
Raw Normal View History

[project]
name = "ragflow"
version = "0.25.5"
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
license-files = ["LICENSE"]
readme = "README.md"
feat: bump Python minimum from 3.12 to 3.13, drop strenum backport (#14767) Closes #14753 ## What changed | File | Change | |---|---| | `pyproject.toml` | `requires-python` → `>=3.13,<3.15`; remove `strenum==0.4.15` | | `Dockerfile` | `uv python install 3.13`, `uv sync --python 3.13` | | `.github/workflows/tests.yml` | `uv sync --python 3.13` on both matrix legs | | `CLAUDE.md` | dev setup command + requirements note updated | | `deepdoc/parser/mineru_parser.py` | `from strenum import StrEnum` → `from enum import StrEnum` | | `agent/tools/code_exec.py` | same | `StrEnum` has been in the stdlib since Python 3.11 — the `strenum` backport package is no longer needed once the floor is 3.13. ## Why uv.lock is not regenerated `uv lock --python 3.13` fails because: 1. The infiniflow/graspologic fork pins `numpy>=1.26.4,<2.0.0` 2. `tensorflow-cpu>=2.20.0` (the first release with cp313 wheels) depends on `ml-dtypes>=0.5.1`, which requires `numpy>=2.1.0` 3. These two constraints are irreconcilable on Python 3.13 The lockfile regeneration requires loosening the `numpy` upper bound in the `infiniflow/graspologic` fork. Once that fork commit is updated and the SHA in `pyproject.toml:49` is bumped, `uv lock --python 3.13` will succeed. ## RFC corrections Two claims in the original RFC (#14753) did not hold up under code review: - **"graspologic hard-blocks 3.13"** — the infiniflow fork at the pinned commit has no `<3.13` Python constraint. The blocker is the transitive `numpy<2.0.0` conflict with tensorflow-cpu's test dependency, not a direct Python version cap. - **"free-threading throughput gains for I/O-bound workload"** — Python 3.13 free-threading requires a special `--disable-gil` build and provides no benefit for async I/O code (the GIL is already released during I/O). The real motivation is forward compatibility and improved error messages.
2026-05-15 08:40:53 +02:00
requires-python = ">=3.13,<3.15"
dependencies = [
feat: bump Python minimum from 3.12 to 3.13, drop strenum backport (#14767) Closes #14753 ## What changed | File | Change | |---|---| | `pyproject.toml` | `requires-python` → `>=3.13,<3.15`; remove `strenum==0.4.15` | | `Dockerfile` | `uv python install 3.13`, `uv sync --python 3.13` | | `.github/workflows/tests.yml` | `uv sync --python 3.13` on both matrix legs | | `CLAUDE.md` | dev setup command + requirements note updated | | `deepdoc/parser/mineru_parser.py` | `from strenum import StrEnum` → `from enum import StrEnum` | | `agent/tools/code_exec.py` | same | `StrEnum` has been in the stdlib since Python 3.11 — the `strenum` backport package is no longer needed once the floor is 3.13. ## Why uv.lock is not regenerated `uv lock --python 3.13` fails because: 1. The infiniflow/graspologic fork pins `numpy>=1.26.4,<2.0.0` 2. `tensorflow-cpu>=2.20.0` (the first release with cp313 wheels) depends on `ml-dtypes>=0.5.1`, which requires `numpy>=2.1.0` 3. These two constraints are irreconcilable on Python 3.13 The lockfile regeneration requires loosening the `numpy` upper bound in the `infiniflow/graspologic` fork. Once that fork commit is updated and the SHA in `pyproject.toml:49` is bumped, `uv lock --python 3.13` will succeed. ## RFC corrections Two claims in the original RFC (#14753) did not hold up under code review: - **"graspologic hard-blocks 3.13"** — the infiniflow fork at the pinned commit has no `<3.13` Python constraint. The blocker is the transitive `numpy<2.0.0` conflict with tensorflow-cpu's test dependency, not a direct Python version cap. - **"free-threading throughput gains for I/O-bound workload"** — Python 3.13 free-threading requires a special `--disable-gil` build and provides no benefit for async I/O code (the GIL is already released during I/O). The real motivation is forward compatibility and improved error messages.
2026-05-15 08:40:53 +02:00
# discord-py==2.3.2 unconditionally imports audioop in discord/player.py at module-
# load time. audioop was removed from the CPython stdlib in Python 3.13 (PEP 594),
# so any import of the discord package raises ImportError on Python 3.13 — even in
# tests that never use voice features. audioop-lts provides the module as a backport.
"audioop-lts>=0.2.1",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"aiosmtplib>=5.0.0",
"akshare>=1.15.78,<2.0.0",
"anthropic==0.76.0",
"arxiv==2.1.3",
"atlassian-python-api==4.0.7",
"azure-identity>=1.25.3",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"azure-storage-file-datalake==12.16.0",
"beartype>=0.20.0,<1.0.0",
"bio==1.7.1",
"browser-use>=0.11.1,<0.12.0",
Feat: add BedrockCV for vision/image2text inference via LiteLLM (#14705) ## Summary - `CvModel["Bedrock"]` was absent from `rag/llm/cv_model.py`, causing `model_instance()` to return `None` when a Bedrock model was used as a PDF parser — even after correct model resolution. - This PR adds `BedrockCV`, enabling Bedrock vision models (e.g. `amazon.nova-pro-v1:0`, `anthropic.claude-3-5-sonnet`) to be used as PDF parsers. ## What problem does this PR solve? When a Bedrock model is selected as the PDF parser in a knowledge base, ingestion failed with: ``` 'LiteLLMBase' object has no attribute 'describe_with_prompt' ``` The root cause: `LiteLLMBase` (the Bedrock chat implementation) was the only registered handler for the Bedrock factory. It does not implement `describe_with_prompt`. `CvModel` had no Bedrock entry, so `model_instance()` returned `None` for `image2text` requests. ## Type of change - [x] New Feature (non-breaking change which adds functionality) ## Changes **`rag/llm/cv_model.py`** Adds `BedrockCV(Base)` with `_FACTORY_NAME = "Bedrock"`: - Uses `litellm.completion` with the `bedrock/` prefix (consistent with `LiteLLMBase`) - Parses AWS credentials from the JSON key assembled by `add_llm` (`auth_mode`, `bedrock_ak`, `bedrock_sk`, `bedrock_region`, `aws_role_arn`) - Supports three auth modes: `access_key_secret`, `iam_role` (via STS `assume_role`), and default credential chain (IRSA, instance profile) - Implements `describe_with_prompt` and `describe` ## Test plan - [ ] Configure a Bedrock vision model (e.g. `amazon.nova-pro-v1:0`) with valid AWS credentials - [ ] Select it as PDF parser in a knowledge base - [ ] Verify ingestion of a PDF document completes without errors - [ ] Verify `CvModel["Bedrock"]` resolves to `BedrockCV` 🤖 Generated with [Claude Code](https://claude.ai/claude-code) --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-11 04:29:58 +02:00
"boto3>=1.28.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"boxsdk>=10.1.0",
"captcha>=0.7.1",
"chardet>=5.2.0,<6.0.0",
"cn2an==0.5.22",
"cohere==5.6.2",
"Crawl4AI>=0.4.0,<1.0.0",
"dashscope==1.25.11",
"deepl==1.18.0",
"debugpy>=1.8.13",
"demjson3==3.0.6",
"discord-py==2.3.2",
"dropbox==12.0.2",
"duckduckgo-search>=7.2.0,<8.0.0",
"editdistance==0.8.1",
"elasticsearch-dsl==8.12.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"exceptiongroup>=1.3.0,<2.0.0",
"feedparser>=6.0.11,<7.0.0",
Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423) ### What problem does this PR solve? #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: jinhai <haijin.chn@gmail.com> Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com> Co-authored-by: TeslaZY <TeslaZY@outlook.com> Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com> Co-authored-by: AB <aj@Ajays-MacBook-Air.local> Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com> Co-authored-by: He Wang <wanghechn@qq.com> Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com> Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box> Co-authored-by: Stephen Hu <stephenhu@seismic.com> Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com> Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com> Co-authored-by: mxc <mxc@example.com> Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com> Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com> Co-authored-by: mcoder6425 <mcoder64@gmail.com> Co-authored-by: lemsn <lemsn@msn.com> Co-authored-by: lemsn <lemsn@126.com> Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com> Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com> Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
2025-10-09 12:36:19 +08:00
"extract-msg>=0.39.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"ffmpeg-python>=0.2.0",
"flasgger>=0.9.7.1,<0.10.0",
"flask-cors==6.0.2",
"flask-login==0.6.3",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"flask-mail>=0.10.0",
"flask-session==0.8.0",
"google-api-python-client>=2.190.0,<3.0.0",
"google-auth-oauthlib>=1.2.0,<2.0.0",
"google-cloud-storage>=2.19.0,<3.0.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"google-genai>=1.41.0,<2.0.0",
"google-search-results==2.4.2",
"graspologic @ git+https://gitee.com/infiniflow/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
"groq>=0.30.0,<1.0.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"grpcio-status==1.67.1",
"html-text==0.6.2",
"infinity-sdk==0.7.0",
"infinity-emb>=0.0.66,<0.0.67",
"jira==3.10.5",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"json-repair==0.35.0",
"langfuse>=4.0.1",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"mammoth>=1.11.0",
"markdown==3.6",
"markdown-to-json==2.1.1",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"markdownify>=1.2.0",
"mcp>=1.19.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"mini-racer>=0.12.4,<0.13.0",
"minio==7.2.4",
"mistralai==0.4.2",
feat/add MySQL and PostgreSQL data source connectors (#12817) ### What problem does this PR solve? This PR adds MySQL and PostgreSQL as data source connectors, allowing users to import data directly from relational databases into RAGFlow for RAG workflows. Many users store their knowledge in databases (product catalogs, documentation, FAQs, etc.) and currently have no way to sync this data into RAGFlow without exporting to files first. This feature lets them connect directly to their databases, run SQL queries, and automatically create documents from the results. Closes #763 Closes #11560 ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): ### What this PR does **New capabilities:** - Connect to MySQL and PostgreSQL databases - Run custom SQL queries to extract data - Map database columns to document content (vectorized) and metadata (searchable) - Support incremental sync using a timestamp column - Full frontend UI with connection form and tooltips **Files changed:** Backend: - `common/constants.py` - Added MYSQL/POSTGRESQL to FileSource enum - `common/data_source/config.py` - Added to DocumentSource enum - `common/data_source/rdbms_connector.py` - New connector (368 lines) - `common/data_source/__init__.py` - Exported the connector - `rag/svr/sync_data_source.py` - Added MySQL and PostgreSQL sync classes - `pyproject.toml` - Added mysql-connector-python dependency Frontend: - `web/src/pages/user-setting/data-source/constant/index.tsx` - Form fields - `web/src/locales/en.ts` - English translations - `web/src/assets/svg/data-source/mysql.svg` - MySQL icon - `web/src/assets/svg/data-source/postgresql.svg` - PostgreSQL icon ### Testing done Tested with MySQL 8.0 and PostgreSQL 16: - Connection validation works correctly - Full sync imports all query results as documents - Incremental sync only fetches rows updated since last sync - Custom SQL queries filter data as expected - Invalid credentials show clear error messages - Lint checks pass (`ruff check` returns no errors) --------- Co-authored-by: mkdev11 <YOUR_GITHUB_ID+MkDev11@users.noreply.github.com>
2026-02-03 23:14:32 -03:00
"mysql-connector-python>=9.0.0,<10.0.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"moodlepy>=0.23.0",
"mypy-boto3-s3==1.40.26",
"Office365-REST-Python-Client==2.6.2",
"ollama>=0.5.0",
"onnxruntime==1.23.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
"onnxruntime-gpu==1.23.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
"opencv-python==4.10.0.84",
"opencv-python-headless==4.10.0.84",
"opendal>=0.45.0,<0.46.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"opensearch-py==2.7.1",
"ormsgpack>=1.6.0",
"pdfplumber==0.10.4",
"pluginlib>=0.10.0",
"psycopg2-binary>=2.9.11,<3.0.0",
"pyclipper>=1.4.0,<2.0.0",
feat: Implement pluggable multi-provider sandbox architecture (#12820) ## Summary Implement a flexible sandbox provider system supporting both self-managed (Docker) and SaaS (Aliyun Code Interpreter) backends for secure code execution in agent workflows. **Key Changes:** - ✅ Aliyun Code Interpreter provider using official `agentrun-sdk>=0.0.16` - ✅ Self-managed provider with gVisor (runsc) security - ✅ Arguments parameter support for dynamic code execution - ✅ Database-only configuration (removed fallback logic) - ✅ Configuration scripts for quick setup Issue #12479 ## Features ### 🔌 Provider Abstraction Layer **1. Self-Managed Provider** (`agent/sandbox/providers/self_managed.py`) - Wraps existing executor_manager HTTP API - gVisor (runsc) for secure container isolation - Configurable pool size, timeout, retry logic - Languages: Python, Node.js, JavaScript - ⚠️ **Requires**: gVisor installation, Docker, base images **2. Aliyun Code Interpreter** (`agent/sandbox/providers/aliyun_codeinterpreter.py`) - SaaS integration using official agentrun-sdk - Serverless microVM execution with auto-authentication - Hard timeout: 30 seconds max - Credentials: `AGENTRUN_ACCESS_KEY_ID`, `AGENTRUN_ACCESS_KEY_SECRET`, `AGENTRUN_ACCOUNT_ID`, `AGENTRUN_REGION` - Automatically wraps code to call `main()` function **3. E2B Provider** (`agent/sandbox/providers/e2b.py`) - Placeholder for future integration ### ⚙️ Configuration System - `conf/system_settings.json`: Default provider = `aliyun_codeinterpreter` - `agent/sandbox/client.py`: Enforces database-only configuration - Admin UI: `/admin/sandbox-settings` - Configuration validation via `validate_config()` method - Health checks for all providers ### 🎯 Key Capabilities **Arguments Parameter Support:** All providers support passing arguments to `main()` function: ```python # User code def main(name: str, count: int) -> dict: return {"message": f"Hello {name}!" * count} # Executed with: arguments={"name": "World", "count": 3} # Result: {"message": "Hello World!Hello World!Hello World!"} ``` **Self-Describing Providers:** Each provider implements `get_config_schema()` returning form configuration for Admin UI **Error Handling:** Structured `ExecutionResult` with stdout, stderr, exit_code, execution_time ## Configuration Scripts Two scripts for quick Aliyun sandbox setup: **Shell Script (requires jq):** ```bash source scripts/configure_aliyun_sandbox.sh ``` **Python Script (interactive):** ```bash python3 scripts/configure_aliyun_sandbox.py ``` ## Testing ```bash # Unit tests uv run pytest agent/sandbox/tests/test_providers.py -v # Aliyun provider tests uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter.py -v # Integration tests (requires credentials) uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v # Quick SDK validation python3 agent/sandbox/tests/verify_sdk.py ``` **Test Coverage:** - 30 unit tests for provider abstraction - Provider-specific tests for Aliyun - Integration tests with real API - Security tests for executor_manager ## Documentation - `docs/develop/sandbox_spec.md` - Complete architecture specification - `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration from legacy sandbox - `agent/sandbox/tests/QUICKSTART.md` - Quick start guide - `agent/sandbox/tests/README.md` - Testing documentation ## Breaking Changes ⚠️ **Migration Required:** 1. **Directory Move**: `sandbox/` → `agent/sandbox/` - Update imports: `from sandbox.` → `from agent.sandbox.` 2. **Mandatory Configuration**: - SystemSettings must have `sandbox.provider_type` configured - Removed fallback default values - Configuration must exist in database (from `conf/system_settings.json`) 3. **Aliyun Credentials**: - Requires `AGENTRUN_*` environment variables (not `ALIYUN_*`) - `AGENTRUN_ACCOUNT_ID` is now required (Aliyun primary account ID) 4. **Self-Managed Provider**: - gVisor (runsc) must be installed for security - Install: `go install gvisor.dev/gvisor/runsc@latest` ## Database Schema Changes ```python # SystemSettings.value: CharField → TextField api/db/db_models.py: Changed for unlimited config length # SystemSettingsService.get_by_name(): Fixed query precision api/db/services/system_settings_service.py: startswith → exact match ``` ## Files Changed ### Backend (Python) - `agent/sandbox/providers/base.py` - SandboxProvider ABC interface - `agent/sandbox/providers/manager.py` - ProviderManager - `agent/sandbox/providers/self_managed.py` - Self-managed provider - `agent/sandbox/providers/aliyun_codeinterpreter.py` - Aliyun provider - `agent/sandbox/providers/e2b.py` - E2B provider (placeholder) - `agent/sandbox/client.py` - Unified client (enforces DB-only config) - `agent/tools/code_exec.py` - Updated to use provider system - `admin/server/services.py` - SandboxMgr with registry & validation - `admin/server/routes.py` - 5 sandbox API endpoints - `conf/system_settings.json` - Default: aliyun_codeinterpreter - `api/db/db_models.py` - TextField for SystemSettings.value - `api/db/services/system_settings_service.py` - Exact match query ### Frontend (TypeScript/React) - `web/src/pages/admin/sandbox-settings.tsx` - Settings UI - `web/src/services/admin-service.ts` - Sandbox service functions - `web/src/services/admin.service.d.ts` - Type definitions - `web/src/utils/api.ts` - Sandbox API endpoints ### Documentation - `docs/develop/sandbox_spec.md` - Architecture spec - `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration guide - `agent/sandbox/tests/QUICKSTART.md` - Quick start - `agent/sandbox/tests/README.md` - Testing guide ### Configuration Scripts - `scripts/configure_aliyun_sandbox.sh` - Shell script (jq) - `scripts/configure_aliyun_sandbox.py` - Python script ### Tests - `agent/sandbox/tests/test_providers.py` - 30 unit tests - `agent/sandbox/tests/test_aliyun_codeinterpreter.py` - Provider tests - `agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py` - Integration tests - `agent/sandbox/tests/verify_sdk.py` - SDK validation ## Architecture ``` Admin UI → Admin API → SandboxMgr → ProviderManager → [SelfManaged|Aliyun|E2B] ↓ SystemSettings ``` ## Usage ### 1. Configure Provider **Via Admin UI:** 1. Navigate to `/admin/sandbox-settings` 2. Select provider (Aliyun Code Interpreter / Self-Managed) 3. Fill in configuration 4. Click "Test Connection" to verify 5. Click "Save" to apply **Via Configuration Scripts:** ```bash # Aliyun provider export AGENTRUN_ACCESS_KEY_ID="xxx" export AGENTRUN_ACCESS_KEY_SECRET="yyy" export AGENTRUN_ACCOUNT_ID="zzz" export AGENTRUN_REGION="cn-shanghai" source scripts/configure_aliyun_sandbox.sh ``` ### 2. Restart Service ```bash cd docker docker compose restart ragflow-server ``` ### 3. Execute Code in Agent ```python from agent.sandbox.client import execute_code result = execute_code( code='def main(name: str) -> dict: return {"message": f"Hello {name}!"}', language="python", timeout=30, arguments={"name": "World"} ) print(result.stdout) # {"message": "Hello World!"} ``` ## Troubleshooting ### "Container pool is busy" (Self-Managed) - **Cause**: Pool exhausted (default: 1 container in `.env`) - **Fix**: Increase `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE` to 5+ ### "Sandbox provider type not configured" - **Cause**: Database missing configuration - **Fix**: Run config script or set via Admin UI ### "gVisor not found" - **Cause**: runsc not installed - **Fix**: `go install gvisor.dev/gvisor/runsc@latest && sudo cp ~/go/bin/runsc /usr/local/bin/` ### Aliyun authentication errors - **Cause**: Wrong environment variable names - **Fix**: Use `AGENTRUN_*` prefix (not `ALIYUN_*`) ## Checklist - [x] All tests passing (30 unit tests + integration tests) - [x] Documentation updated (spec, migration guide, quickstart) - [x] Type definitions added (TypeScript) - [x] Admin UI implemented - [x] Configuration validation - [x] Health checks implemented - [x] Error handling with structured results - [x] Breaking changes documented - [x] Configuration scripts created - [x] gVisor requirements documented Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-28 13:28:21 +08:00
# "pywencai>=0.13.1,<1.0.0", # Temporarily disabled: conflicts with agentrun-sdk (pydash>=8), needed for agent/tools/wencai.py
"pycryptodomex==3.20.0",
"pyobvector==0.2.22",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"pyodbc>=5.2.0,<6.0.0",
"pypandoc>=1.16",
Build(deps): Bump pypdf from 6.9.2 to 6.10.2 (#14184) Bumps [pypdf](https://github.com/py-pdf/pypdf) from 6.9.2 to 6.10.2. <details> <summary>Release notes</summary> <p><em>Sourced from <a href="https://github.com/py-pdf/pypdf/releases">pypdf's releases</a>.</em></p> <blockquote> <h2>Version 6.10.2, 2026-04-15</h2> <h2>What's new</h2> <h3>Security (SEC)</h3> <ul> <li>Do not rely on possibly invalid /Size for incremental cloning (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3735">#3735</a>) by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> <li>Introduce limits for FlateDecode parameters and image decoding (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3734">#3734</a>) by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.10.1...6.10.2">Full Changelog</a></p> <h2>Version 6.10.1, 2026-04-14</h2> <h2>What's new</h2> <h3>Security (SEC)</h3> <ul> <li>Limit the allowed size of xref and object streams (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3733">#3733</a>) by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> </ul> <h3>Robustness (ROB)</h3> <ul> <li>Consider strict mode setting for decryption errors (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3731">#3731</a>) by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> </ul> <h3>Documentation (DOC)</h3> <ul> <li>Use new parameter names for compress_identical_objects by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.10.0...6.10.1">Full Changelog</a></p> <h2>Version 6.10.0, 2026-04-10</h2> <h2>What's new</h2> <h3>Security (SEC)</h3> <ul> <li>Disallow custom XML entity declarations for XMP metadata (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3724">#3724</a>) by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> </ul> <h3>New Features (ENH)</h3> <ul> <li>Skip MD5 key derivation for AES-256 encrypted PDFs (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3694">#3694</a>) by <a href="https://github.com/Ygnas"><code>@​Ygnas</code></a></li> </ul> <h3>Bug Fixes (BUG)</h3> <ul> <li>Use remove_orphans in compress_identical_objects (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3310">#3310</a>) by <a href="https://github.com/j-t-1"><code>@​j-t-1</code></a></li> <li>Fix PdfReadError when xref table contains comments before trailer (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3710">#3710</a>) by <a href="https://github.com/rassie"><code>@​rassie</code></a></li> <li>Correctly verify AES padding during decryption (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3699">#3699</a>) by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> <li>Fix stale object cache from non-authoritative object streams (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3698">#3698</a>) by <a href="https://github.com/astahlman"><code>@​astahlman</code></a></li> <li>Fix extract_links pairing when annotations include non-links (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3687">#3687</a>) by <a href="https://github.com/ReinerBRO"><code>@​ReinerBRO</code></a></li> </ul> <h3>Documentation (DOC)</h3> <ul> <li>Add AI policy (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3717">#3717</a>) by <a href="https://github.com/stefan6419846"><code>@​stefan6419846</code></a></li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.9.2...6.10.0">Full Changelog</a></p> </blockquote> </details> <details> <summary>Changelog</summary> <p><em>Sourced from <a href="https://github.com/py-pdf/pypdf/blob/main/CHANGELOG.md">pypdf's changelog</a>.</em></p> <blockquote> <h2>Version 6.10.2, 2026-04-15</h2> <h3>Security (SEC)</h3> <ul> <li>Do not rely on possibly invalid /Size for incremental cloning (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3735">#3735</a>)</li> <li>Introduce limits for FlateDecode parameters and image decoding (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3734">#3734</a>)</li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.10.1...6.10.2">Full Changelog</a></p> <h2>Version 6.10.1, 2026-04-14</h2> <h3>Security (SEC)</h3> <ul> <li>Limit the allowed size of xref and object streams (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3733">#3733</a>)</li> </ul> <h3>Robustness (ROB)</h3> <ul> <li>Consider strict mode setting for decryption errors (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3731">#3731</a>)</li> </ul> <h3>Documentation (DOC)</h3> <ul> <li>Use new parameter names for compress_identical_objects</li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.10.0...6.10.1">Full Changelog</a></p> <h2>Version 6.10.0, 2026-04-10</h2> <h3>Security (SEC)</h3> <ul> <li>Disallow custom XML entity declarations for XMP metadata (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3724">#3724</a>)</li> </ul> <h3>New Features (ENH)</h3> <ul> <li>Skip MD5 key derivation for AES-256 encrypted PDFs (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3694">#3694</a>)</li> </ul> <h3>Bug Fixes (BUG)</h3> <ul> <li>Use remove_orphans in compress_identical_objects (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3310">#3310</a>)</li> <li>Fix PdfReadError when xref table contains comments before trailer (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3710">#3710</a>)</li> <li>Correctly verify AES padding during decryption (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3699">#3699</a>)</li> <li>Fix stale object cache from non-authoritative object streams (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3698">#3698</a>)</li> <li>Fix extract_links pairing when annotations include non-links (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3687">#3687</a>)</li> </ul> <h3>Documentation (DOC)</h3> <ul> <li>Add AI policy (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3717">#3717</a>)</li> </ul> <p><a href="https://github.com/py-pdf/pypdf/compare/6.9.2...6.10.0">Full Changelog</a></p> </blockquote> </details> <details> <summary>Commits</summary> <ul> <li><a href="https://github.com/py-pdf/pypdf/commit/c476b4f293c8ef4cac07dfb755e5582d838fcdc0"><code>c476b4f</code></a> REL: 6.10.2</li> <li><a href="https://github.com/py-pdf/pypdf/commit/c50a0104cf083356f7c7f5d61410466a57f5c88a"><code>c50a010</code></a> SEC: Do not rely on possibly invalid /Size for incremental cloning (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3735">#3735</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/ac734dab4eef92bcce50d503949b4d9887d89f11"><code>ac734da</code></a> SEC: Introduce limits for FlateDecode parameters and image decoding (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3734">#3734</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/b49e7eb45422c19b68ac59c51b7699409e74d44e"><code>b49e7eb</code></a> REL: 6.10.1</li> <li><a href="https://github.com/py-pdf/pypdf/commit/62338e9d36419cf193ccec7331784f45df1d70b3"><code>62338e9</code></a> SEC: Limit the allowed size of xref and object streams (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3733">#3733</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/5dcc0aebaa2c732028ea8def2eb9982e324b7c11"><code>5dcc0ae</code></a> DEV: Update pytest-benchmark to 5.2.3</li> <li><a href="https://github.com/py-pdf/pypdf/commit/b42e4aa98ae5c7fdd02558d165d39fe639fdf97d"><code>b42e4aa</code></a> DEV: Update pinned pillow and pytest where possible (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3732">#3732</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/717446b1218a3eb236cb47d1bae2b68451ccb6c0"><code>717446b</code></a> ROB: Consider strict mode setting for decryption errors (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3731">#3731</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/9e461d361b9004da68fc8e6acc4308cce68aa304"><code>9e461d3</code></a> DEV: Bump softprops/action-gh-release from 2 to 3 (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3730">#3730</a>)</li> <li><a href="https://github.com/py-pdf/pypdf/commit/500d09d92fa80a6f1fcdfa46656893efd05e91ff"><code>500d09d</code></a> TST: Update <code>test_embedded_file__basic</code> to use <code>tmp_path</code> fixture (<a href="https://redirect.github.com/py-pdf/pypdf/issues/3726">#3726</a>)</li> <li>Additional commits viewable in <a href="https://github.com/py-pdf/pypdf/compare/6.9.2...6.10.2">compare view</a></li> </ul> </details> <br /> [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=pypdf&package-manager=uv&previous-version=6.9.2&new-version=6.10.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) --- <details> <summary>Dependabot commands and options</summary> <br /> You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/infiniflow/ragflow/network/alerts). </details> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-04-17 18:43:19 +08:00
"pypdf>=6.10.2",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"python-calamine>=0.4.0",
"python-docx>=1.1.2,<2.0.0",
"paramiko>=3.5.1",
"python-pptx>=1.0.2,<2.0.0",
feat: Implement pluggable multi-provider sandbox architecture (#12820) ## Summary Implement a flexible sandbox provider system supporting both self-managed (Docker) and SaaS (Aliyun Code Interpreter) backends for secure code execution in agent workflows. **Key Changes:** - ✅ Aliyun Code Interpreter provider using official `agentrun-sdk>=0.0.16` - ✅ Self-managed provider with gVisor (runsc) security - ✅ Arguments parameter support for dynamic code execution - ✅ Database-only configuration (removed fallback logic) - ✅ Configuration scripts for quick setup Issue #12479 ## Features ### 🔌 Provider Abstraction Layer **1. Self-Managed Provider** (`agent/sandbox/providers/self_managed.py`) - Wraps existing executor_manager HTTP API - gVisor (runsc) for secure container isolation - Configurable pool size, timeout, retry logic - Languages: Python, Node.js, JavaScript - ⚠️ **Requires**: gVisor installation, Docker, base images **2. Aliyun Code Interpreter** (`agent/sandbox/providers/aliyun_codeinterpreter.py`) - SaaS integration using official agentrun-sdk - Serverless microVM execution with auto-authentication - Hard timeout: 30 seconds max - Credentials: `AGENTRUN_ACCESS_KEY_ID`, `AGENTRUN_ACCESS_KEY_SECRET`, `AGENTRUN_ACCOUNT_ID`, `AGENTRUN_REGION` - Automatically wraps code to call `main()` function **3. E2B Provider** (`agent/sandbox/providers/e2b.py`) - Placeholder for future integration ### ⚙️ Configuration System - `conf/system_settings.json`: Default provider = `aliyun_codeinterpreter` - `agent/sandbox/client.py`: Enforces database-only configuration - Admin UI: `/admin/sandbox-settings` - Configuration validation via `validate_config()` method - Health checks for all providers ### 🎯 Key Capabilities **Arguments Parameter Support:** All providers support passing arguments to `main()` function: ```python # User code def main(name: str, count: int) -> dict: return {"message": f"Hello {name}!" * count} # Executed with: arguments={"name": "World", "count": 3} # Result: {"message": "Hello World!Hello World!Hello World!"} ``` **Self-Describing Providers:** Each provider implements `get_config_schema()` returning form configuration for Admin UI **Error Handling:** Structured `ExecutionResult` with stdout, stderr, exit_code, execution_time ## Configuration Scripts Two scripts for quick Aliyun sandbox setup: **Shell Script (requires jq):** ```bash source scripts/configure_aliyun_sandbox.sh ``` **Python Script (interactive):** ```bash python3 scripts/configure_aliyun_sandbox.py ``` ## Testing ```bash # Unit tests uv run pytest agent/sandbox/tests/test_providers.py -v # Aliyun provider tests uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter.py -v # Integration tests (requires credentials) uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v # Quick SDK validation python3 agent/sandbox/tests/verify_sdk.py ``` **Test Coverage:** - 30 unit tests for provider abstraction - Provider-specific tests for Aliyun - Integration tests with real API - Security tests for executor_manager ## Documentation - `docs/develop/sandbox_spec.md` - Complete architecture specification - `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration from legacy sandbox - `agent/sandbox/tests/QUICKSTART.md` - Quick start guide - `agent/sandbox/tests/README.md` - Testing documentation ## Breaking Changes ⚠️ **Migration Required:** 1. **Directory Move**: `sandbox/` → `agent/sandbox/` - Update imports: `from sandbox.` → `from agent.sandbox.` 2. **Mandatory Configuration**: - SystemSettings must have `sandbox.provider_type` configured - Removed fallback default values - Configuration must exist in database (from `conf/system_settings.json`) 3. **Aliyun Credentials**: - Requires `AGENTRUN_*` environment variables (not `ALIYUN_*`) - `AGENTRUN_ACCOUNT_ID` is now required (Aliyun primary account ID) 4. **Self-Managed Provider**: - gVisor (runsc) must be installed for security - Install: `go install gvisor.dev/gvisor/runsc@latest` ## Database Schema Changes ```python # SystemSettings.value: CharField → TextField api/db/db_models.py: Changed for unlimited config length # SystemSettingsService.get_by_name(): Fixed query precision api/db/services/system_settings_service.py: startswith → exact match ``` ## Files Changed ### Backend (Python) - `agent/sandbox/providers/base.py` - SandboxProvider ABC interface - `agent/sandbox/providers/manager.py` - ProviderManager - `agent/sandbox/providers/self_managed.py` - Self-managed provider - `agent/sandbox/providers/aliyun_codeinterpreter.py` - Aliyun provider - `agent/sandbox/providers/e2b.py` - E2B provider (placeholder) - `agent/sandbox/client.py` - Unified client (enforces DB-only config) - `agent/tools/code_exec.py` - Updated to use provider system - `admin/server/services.py` - SandboxMgr with registry & validation - `admin/server/routes.py` - 5 sandbox API endpoints - `conf/system_settings.json` - Default: aliyun_codeinterpreter - `api/db/db_models.py` - TextField for SystemSettings.value - `api/db/services/system_settings_service.py` - Exact match query ### Frontend (TypeScript/React) - `web/src/pages/admin/sandbox-settings.tsx` - Settings UI - `web/src/services/admin-service.ts` - Sandbox service functions - `web/src/services/admin.service.d.ts` - Type definitions - `web/src/utils/api.ts` - Sandbox API endpoints ### Documentation - `docs/develop/sandbox_spec.md` - Architecture spec - `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration guide - `agent/sandbox/tests/QUICKSTART.md` - Quick start - `agent/sandbox/tests/README.md` - Testing guide ### Configuration Scripts - `scripts/configure_aliyun_sandbox.sh` - Shell script (jq) - `scripts/configure_aliyun_sandbox.py` - Python script ### Tests - `agent/sandbox/tests/test_providers.py` - 30 unit tests - `agent/sandbox/tests/test_aliyun_codeinterpreter.py` - Provider tests - `agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py` - Integration tests - `agent/sandbox/tests/verify_sdk.py` - SDK validation ## Architecture ``` Admin UI → Admin API → SandboxMgr → ProviderManager → [SelfManaged|Aliyun|E2B] ↓ SystemSettings ``` ## Usage ### 1. Configure Provider **Via Admin UI:** 1. Navigate to `/admin/sandbox-settings` 2. Select provider (Aliyun Code Interpreter / Self-Managed) 3. Fill in configuration 4. Click "Test Connection" to verify 5. Click "Save" to apply **Via Configuration Scripts:** ```bash # Aliyun provider export AGENTRUN_ACCESS_KEY_ID="xxx" export AGENTRUN_ACCESS_KEY_SECRET="yyy" export AGENTRUN_ACCOUNT_ID="zzz" export AGENTRUN_REGION="cn-shanghai" source scripts/configure_aliyun_sandbox.sh ``` ### 2. Restart Service ```bash cd docker docker compose restart ragflow-server ``` ### 3. Execute Code in Agent ```python from agent.sandbox.client import execute_code result = execute_code( code='def main(name: str) -> dict: return {"message": f"Hello {name}!"}', language="python", timeout=30, arguments={"name": "World"} ) print(result.stdout) # {"message": "Hello World!"} ``` ## Troubleshooting ### "Container pool is busy" (Self-Managed) - **Cause**: Pool exhausted (default: 1 container in `.env`) - **Fix**: Increase `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE` to 5+ ### "Sandbox provider type not configured" - **Cause**: Database missing configuration - **Fix**: Run config script or set via Admin UI ### "gVisor not found" - **Cause**: runsc not installed - **Fix**: `go install gvisor.dev/gvisor/runsc@latest && sudo cp ~/go/bin/runsc /usr/local/bin/` ### Aliyun authentication errors - **Cause**: Wrong environment variable names - **Fix**: Use `AGENTRUN_*` prefix (not `ALIYUN_*`) ## Checklist - [x] All tests passing (30 unit tests + integration tests) - [x] Documentation updated (spec, migration guide, quickstart) - [x] Type definitions added (TypeScript) - [x] Admin UI implemented - [x] Configuration validation - [x] Health checks implemented - [x] Error handling with structured results - [x] Breaking changes documented - [x] Configuration scripts created - [x] gVisor requirements documented Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-28 13:28:21 +08:00
# "pywencai>=0.13.1,<1.0.0", # Temporarily disabled: conflicts with agentrun-sdk (pydash>=8), needed for agent/tools/wencai.py
"qianfan==0.4.6",
"quart-auth==0.11.0",
"quart-cors==0.8.0",
"ranx==0.3.20",
"readability-lxml>=0.8.4,<1.0.0",
"replicate==0.31.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"reportlab>=4.4.1",
"roman-numbers==1.0.2",
"ruamel-base==1.0.0",
"ruamel-yaml>=0.18.6,<0.19.0",
"scholarly==1.7.11",
"selenium-wire==5.1.0",
"spacy==3.8.14",
"en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
"slack-sdk==3.37.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"socksio==1.0.0",
feat: Implement pluggable multi-provider sandbox architecture (#12820) ## Summary Implement a flexible sandbox provider system supporting both self-managed (Docker) and SaaS (Aliyun Code Interpreter) backends for secure code execution in agent workflows. **Key Changes:** - ✅ Aliyun Code Interpreter provider using official `agentrun-sdk>=0.0.16` - ✅ Self-managed provider with gVisor (runsc) security - ✅ Arguments parameter support for dynamic code execution - ✅ Database-only configuration (removed fallback logic) - ✅ Configuration scripts for quick setup Issue #12479 ## Features ### 🔌 Provider Abstraction Layer **1. Self-Managed Provider** (`agent/sandbox/providers/self_managed.py`) - Wraps existing executor_manager HTTP API - gVisor (runsc) for secure container isolation - Configurable pool size, timeout, retry logic - Languages: Python, Node.js, JavaScript - ⚠️ **Requires**: gVisor installation, Docker, base images **2. Aliyun Code Interpreter** (`agent/sandbox/providers/aliyun_codeinterpreter.py`) - SaaS integration using official agentrun-sdk - Serverless microVM execution with auto-authentication - Hard timeout: 30 seconds max - Credentials: `AGENTRUN_ACCESS_KEY_ID`, `AGENTRUN_ACCESS_KEY_SECRET`, `AGENTRUN_ACCOUNT_ID`, `AGENTRUN_REGION` - Automatically wraps code to call `main()` function **3. E2B Provider** (`agent/sandbox/providers/e2b.py`) - Placeholder for future integration ### ⚙️ Configuration System - `conf/system_settings.json`: Default provider = `aliyun_codeinterpreter` - `agent/sandbox/client.py`: Enforces database-only configuration - Admin UI: `/admin/sandbox-settings` - Configuration validation via `validate_config()` method - Health checks for all providers ### 🎯 Key Capabilities **Arguments Parameter Support:** All providers support passing arguments to `main()` function: ```python # User code def main(name: str, count: int) -> dict: return {"message": f"Hello {name}!" * count} # Executed with: arguments={"name": "World", "count": 3} # Result: {"message": "Hello World!Hello World!Hello World!"} ``` **Self-Describing Providers:** Each provider implements `get_config_schema()` returning form configuration for Admin UI **Error Handling:** Structured `ExecutionResult` with stdout, stderr, exit_code, execution_time ## Configuration Scripts Two scripts for quick Aliyun sandbox setup: **Shell Script (requires jq):** ```bash source scripts/configure_aliyun_sandbox.sh ``` **Python Script (interactive):** ```bash python3 scripts/configure_aliyun_sandbox.py ``` ## Testing ```bash # Unit tests uv run pytest agent/sandbox/tests/test_providers.py -v # Aliyun provider tests uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter.py -v # Integration tests (requires credentials) uv run pytest agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py -v # Quick SDK validation python3 agent/sandbox/tests/verify_sdk.py ``` **Test Coverage:** - 30 unit tests for provider abstraction - Provider-specific tests for Aliyun - Integration tests with real API - Security tests for executor_manager ## Documentation - `docs/develop/sandbox_spec.md` - Complete architecture specification - `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration from legacy sandbox - `agent/sandbox/tests/QUICKSTART.md` - Quick start guide - `agent/sandbox/tests/README.md` - Testing documentation ## Breaking Changes ⚠️ **Migration Required:** 1. **Directory Move**: `sandbox/` → `agent/sandbox/` - Update imports: `from sandbox.` → `from agent.sandbox.` 2. **Mandatory Configuration**: - SystemSettings must have `sandbox.provider_type` configured - Removed fallback default values - Configuration must exist in database (from `conf/system_settings.json`) 3. **Aliyun Credentials**: - Requires `AGENTRUN_*` environment variables (not `ALIYUN_*`) - `AGENTRUN_ACCOUNT_ID` is now required (Aliyun primary account ID) 4. **Self-Managed Provider**: - gVisor (runsc) must be installed for security - Install: `go install gvisor.dev/gvisor/runsc@latest` ## Database Schema Changes ```python # SystemSettings.value: CharField → TextField api/db/db_models.py: Changed for unlimited config length # SystemSettingsService.get_by_name(): Fixed query precision api/db/services/system_settings_service.py: startswith → exact match ``` ## Files Changed ### Backend (Python) - `agent/sandbox/providers/base.py` - SandboxProvider ABC interface - `agent/sandbox/providers/manager.py` - ProviderManager - `agent/sandbox/providers/self_managed.py` - Self-managed provider - `agent/sandbox/providers/aliyun_codeinterpreter.py` - Aliyun provider - `agent/sandbox/providers/e2b.py` - E2B provider (placeholder) - `agent/sandbox/client.py` - Unified client (enforces DB-only config) - `agent/tools/code_exec.py` - Updated to use provider system - `admin/server/services.py` - SandboxMgr with registry & validation - `admin/server/routes.py` - 5 sandbox API endpoints - `conf/system_settings.json` - Default: aliyun_codeinterpreter - `api/db/db_models.py` - TextField for SystemSettings.value - `api/db/services/system_settings_service.py` - Exact match query ### Frontend (TypeScript/React) - `web/src/pages/admin/sandbox-settings.tsx` - Settings UI - `web/src/services/admin-service.ts` - Sandbox service functions - `web/src/services/admin.service.d.ts` - Type definitions - `web/src/utils/api.ts` - Sandbox API endpoints ### Documentation - `docs/develop/sandbox_spec.md` - Architecture spec - `agent/sandbox/tests/MIGRATION_GUIDE.md` - Migration guide - `agent/sandbox/tests/QUICKSTART.md` - Quick start - `agent/sandbox/tests/README.md` - Testing guide ### Configuration Scripts - `scripts/configure_aliyun_sandbox.sh` - Shell script (jq) - `scripts/configure_aliyun_sandbox.py` - Python script ### Tests - `agent/sandbox/tests/test_providers.py` - 30 unit tests - `agent/sandbox/tests/test_aliyun_codeinterpreter.py` - Provider tests - `agent/sandbox/tests/test_aliyun_codeinterpreter_integration.py` - Integration tests - `agent/sandbox/tests/verify_sdk.py` - SDK validation ## Architecture ``` Admin UI → Admin API → SandboxMgr → ProviderManager → [SelfManaged|Aliyun|E2B] ↓ SystemSettings ``` ## Usage ### 1. Configure Provider **Via Admin UI:** 1. Navigate to `/admin/sandbox-settings` 2. Select provider (Aliyun Code Interpreter / Self-Managed) 3. Fill in configuration 4. Click "Test Connection" to verify 5. Click "Save" to apply **Via Configuration Scripts:** ```bash # Aliyun provider export AGENTRUN_ACCESS_KEY_ID="xxx" export AGENTRUN_ACCESS_KEY_SECRET="yyy" export AGENTRUN_ACCOUNT_ID="zzz" export AGENTRUN_REGION="cn-shanghai" source scripts/configure_aliyun_sandbox.sh ``` ### 2. Restart Service ```bash cd docker docker compose restart ragflow-server ``` ### 3. Execute Code in Agent ```python from agent.sandbox.client import execute_code result = execute_code( code='def main(name: str) -> dict: return {"message": f"Hello {name}!"}', language="python", timeout=30, arguments={"name": "World"} ) print(result.stdout) # {"message": "Hello World!"} ``` ## Troubleshooting ### "Container pool is busy" (Self-Managed) - **Cause**: Pool exhausted (default: 1 container in `.env`) - **Fix**: Increase `SANDBOX_EXECUTOR_MANAGER_POOL_SIZE` to 5+ ### "Sandbox provider type not configured" - **Cause**: Database missing configuration - **Fix**: Run config script or set via Admin UI ### "gVisor not found" - **Cause**: runsc not installed - **Fix**: `go install gvisor.dev/gvisor/runsc@latest && sudo cp ~/go/bin/runsc /usr/local/bin/` ### Aliyun authentication errors - **Cause**: Wrong environment variable names - **Fix**: Use `AGENTRUN_*` prefix (not `ALIYUN_*`) ## Checklist - [x] All tests passing (30 unit tests + integration tests) - [x] Documentation updated (spec, migration guide, quickstart) - [x] Type definitions added (TypeScript) - [x] Admin UI implemented - [x] Configuration validation - [x] Health checks implemented - [x] Error handling with structured results - [x] Breaking changes documented - [x] Configuration scripts created - [x] gVisor requirements documented Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-28 13:28:21 +08:00
"agentrun-sdk>=0.0.16,<1.0.0",
"nest-asyncio>=1.6.0,<2.0.0", # Needed for agent/component/message.py
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"sqlglotrs==0.9.0",
"tavily-python==0.5.1",
"tencentcloud-sdk-python==3.0.1478",
"tika==2.6.0",
Remove unused py module dependencies (#11964) ### What problem does this PR solve? ``` pip list --not-required Package Version ---------------------------- --------------------- aiosmtplib 5.0.0 akshare 1.17.94 anthropic 0.34.1 arxiv 2.1.3 Aspose.Slides 24.7.0 atlassian-python-api 4.0.7 azure-identity 1.17.1 azure-storage-file-datalake 12.16.0 bio 1.7.1 boxsdk 10.2.0 captcha 0.7.1 cn2an 0.5.22 cohere 5.6.2 Crawl4AI 0.4.247 dashscope 1.20.11 deepl 1.18.0 demjson3 3.0.6 discord.py 2.3.2 dropbox 12.0.2 duckduckgo_search 7.5.5 editdistance 0.8.1 elasticsearch-dsl 8.12.0 exceptiongroup 1.3.1 extract-msg 0.55.0 ffmpeg-python 0.2.0 flasgger 0.9.7.1 Flask-Cors 5.0.0 Flask-Login 0.6.3 Flask-Mail 0.10.0 Flask-Session 0.8.0 google-auth-oauthlib 1.2.3 google-genai 1.55.0 google-generativeai 0.8.5 google_search_results 2.4.2 graspologic 0.1.dev847+g38e680cab groq 0.9.0 grpcio-status 1.67.1 html_text 0.6.2 imageio-ffmpeg 0.6.0 infinity_emb 0.0.66 infinity-sdk 0.6.11 jira 3.10.5 json_repair 0.35.0 langfuse 3.10.5 mammoth 1.11.0 Markdown 3.6 markdown_to_json 2.1.1 markdownify 1.2.2 mcp 1.19.0 mini-racer 0.12.4 minio 7.2.4 mistralai 0.4.2 moodlepy 0.24.1 mypy-boto3-s3 1.40.26 Office365-REST-Python-Client 2.6.2 ollama 0.6.1 onnxruntime-gpu 1.23.2 opencv-python 4.10.0.84 opencv-python-headless 4.10.0.84 opendal 0.45.20 opensearch-py 2.7.1 ormsgpack 1.5.0 pdfplumber 0.10.4 pip 25.3 pluginlib 0.9.4 psycopg2-binary 2.9.11 pyclipper 1.4.0 pycryptodomex 3.20.0 pyobvector 0.2.18 pyodbc 5.3.0 pypandoc 1.16.2 pypdf 6.4.0 PyPDF2 3.0.1 python-calamine 0.6.1 python-docx 1.2.0 python-pptx 1.0.2 pywencai 0.13.1 qianfan 0.4.6 quart-auth 0.11.0 quart-cors 0.8.0 ranx 0.3.20 readability-lxml 0.8.4.1 replicate 0.31.0 reportlab 4.4.6 roman-numbers 1.0.2 ruamel.base 1.0.0 ruamel.yaml 0.18.16 scholarly 1.7.11 selenium-wire 5.1.0 slack_sdk 3.37.0 socksio 1.0.0 sqlglotrs 0.9.0 StrEnum 0.4.15 tavily-python 0.5.1 tencentcloud-sdk-python 3.0.1478 tika 2.6.0 valkey 6.0.2 vertexai 1.70.0 volcengine 1.0.194 voyageai 0.2.3 webdav4 0.10.0 webdriver-manager 4.0.1 wikipedia 1.4.0 word2number 1.1 xgboost 1.6.0 xpinyin 0.7.6 yfinance 0.2.65 zhipuai 2.0.1 ``` ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-16 12:40:03 +08:00
"valkey==6.0.2",
"volcengine==1.0.194",
"voyageai==0.2.3",
"webdav4>=0.10.0,<0.11.0",
"webdriver-manager==4.0.1",
"wikipedia==1.4.0",
"word2number==1.1",
"xgboost==1.6.0",
"setuptools>=78.1.1,<81.0.0",
"xpinyin==0.7.6",
"yfinance==0.2.65",
"zhipuai==2.0.1",
"peewee>=3.17.1,<4.0.0",
# following modules aren't necessary
# "nltk==3.9.1",
# "numpy>=1.26.0,<2.0.0",
# "openai>=1.45.0",
# "openpyxl>=3.1.0,<4.0.0",
# "pandas>=2.2.0,<3.0.0",
# "pillow>=12.2.0,<13.0.0",
# "protobuf==5.27.2",
# "pymysql>=1.1.1,<2.0.0",
# "python-dotenv==1.0.1",
# "python-dateutil==2.8.2",
# "Quart==0.20.0",
# "requests>=2.32.3,<3.0.0",
# "scikit-learn==1.5.0",
# "selenium==4.22.0",
# "shapely==2.0.5",
# "six==1.16.0",
# "tabulate==0.9.0",
# "tiktoken==0.7.0",
# "umap_learn==0.5.6",
# "werkzeug==3.0.6",
# "xxhash>=3.5.0,<4.0.0",
# "trio>=0.17.0,<0.29.0",
# "click>=8.1.8",
"litellm~=1.82.0,!=1.82.7,!=1.82.8",
# "pip>=25.2",
# "imageio-ffmpeg>=0.6.0",
# "cryptography==46.0.3",
# "jinja2>=3.1.0",
"pyairtable>=3.3.0",
"pygithub>=2.8.1",
"asana>=5.2.2",
"python-gitlab>=7.0.0",
"alibabacloud-dingtalk>=2.0.0",
"quart-schema==0.23.0",
]
[dependency-groups]
test = [
"hypothesis>=6.132.0",
"openpyxl>=3.1.5",
"pillow>=12.2.0,<13.0.0",
"pytest>=8.3.5",
"pytest-asyncio>=1.3.0",
"pytest-xdist>=3.8.0",
"pytest-cov>=7.0.0",
"python-docx>=1.1.2",
"python-pptx>=1.0.2",
"reportlab>=4.4.1",
"requests>=2.32.2",
"requests-toolbelt>=1.0.0",
"pycryptodomex==3.20.0",
"pytest-playwright>=0.7.2",
"codecov>=2.1.13",
]
[tool.uv]
constraint-dependencies = [
# CVE-2026-30922: Denial of Service via unbounded recursion in ASN.1 decoding (CVSS 7.5 HIGH)
# pyasn1 < 0.6.3 is vulnerable; pulled in transitively via google-auth / rsa / pyasn1-modules
"pyasn1>=0.6.3",
feat: bump Python minimum from 3.12 to 3.13, drop strenum backport (#14767) Closes #14753 ## What changed | File | Change | |---|---| | `pyproject.toml` | `requires-python` → `>=3.13,<3.15`; remove `strenum==0.4.15` | | `Dockerfile` | `uv python install 3.13`, `uv sync --python 3.13` | | `.github/workflows/tests.yml` | `uv sync --python 3.13` on both matrix legs | | `CLAUDE.md` | dev setup command + requirements note updated | | `deepdoc/parser/mineru_parser.py` | `from strenum import StrEnum` → `from enum import StrEnum` | | `agent/tools/code_exec.py` | same | `StrEnum` has been in the stdlib since Python 3.11 — the `strenum` backport package is no longer needed once the floor is 3.13. ## Why uv.lock is not regenerated `uv lock --python 3.13` fails because: 1. The infiniflow/graspologic fork pins `numpy>=1.26.4,<2.0.0` 2. `tensorflow-cpu>=2.20.0` (the first release with cp313 wheels) depends on `ml-dtypes>=0.5.1`, which requires `numpy>=2.1.0` 3. These two constraints are irreconcilable on Python 3.13 The lockfile regeneration requires loosening the `numpy` upper bound in the `infiniflow/graspologic` fork. Once that fork commit is updated and the SHA in `pyproject.toml:49` is bumped, `uv lock --python 3.13` will succeed. ## RFC corrections Two claims in the original RFC (#14753) did not hold up under code review: - **"graspologic hard-blocks 3.13"** — the infiniflow fork at the pinned commit has no `<3.13` Python constraint. The blocker is the transitive `numpy<2.0.0` conflict with tensorflow-cpu's test dependency, not a direct Python version cap. - **"free-threading throughput gains for I/O-bound workload"** — Python 3.13 free-threading requires a special `--disable-gil` build and provides no benefit for async I/O code (the GIL is already released during I/O). The real motivation is forward compatibility and improved error messages.
2026-05-15 08:40:53 +02:00
# Python 3.13 added pathlib.PurePath.parser as a public class attribute holding
# the posixpath/ntpath module. trio<0.26 introspects all Path class attributes to
# generate async forwards and raises TypeError on any non-callable attribute it
# encounters (fixed in trio 0.26 by skipping non-callables). Pulled in transitively
# via selenium-wire -> trio-websocket -> trio.
"trio>=0.26.0",
]
feat: bump Python minimum from 3.12 to 3.13, drop strenum backport (#14767) Closes #14753 ## What changed | File | Change | |---|---| | `pyproject.toml` | `requires-python` → `>=3.13,<3.15`; remove `strenum==0.4.15` | | `Dockerfile` | `uv python install 3.13`, `uv sync --python 3.13` | | `.github/workflows/tests.yml` | `uv sync --python 3.13` on both matrix legs | | `CLAUDE.md` | dev setup command + requirements note updated | | `deepdoc/parser/mineru_parser.py` | `from strenum import StrEnum` → `from enum import StrEnum` | | `agent/tools/code_exec.py` | same | `StrEnum` has been in the stdlib since Python 3.11 — the `strenum` backport package is no longer needed once the floor is 3.13. ## Why uv.lock is not regenerated `uv lock --python 3.13` fails because: 1. The infiniflow/graspologic fork pins `numpy>=1.26.4,<2.0.0` 2. `tensorflow-cpu>=2.20.0` (the first release with cp313 wheels) depends on `ml-dtypes>=0.5.1`, which requires `numpy>=2.1.0` 3. These two constraints are irreconcilable on Python 3.13 The lockfile regeneration requires loosening the `numpy` upper bound in the `infiniflow/graspologic` fork. Once that fork commit is updated and the SHA in `pyproject.toml:49` is bumped, `uv lock --python 3.13` will succeed. ## RFC corrections Two claims in the original RFC (#14753) did not hold up under code review: - **"graspologic hard-blocks 3.13"** — the infiniflow fork at the pinned commit has no `<3.13` Python constraint. The blocker is the transitive `numpy<2.0.0` conflict with tensorflow-cpu's test dependency, not a direct Python version cap. - **"free-threading throughput gains for I/O-bound workload"** — Python 3.13 free-threading requires a special `--disable-gil` build and provides no benefit for async I/O code (the GIL is already released during I/O). The real motivation is forward compatibility and improved error messages.
2026-05-15 08:40:53 +02:00
override-dependencies = [
# moodlepy<=0.24.1 pins attrs<23.0.0, but trio>=0.26.0 requires attrs>=23.2.0.
# attrs 23.x is backward-compatible; moodlepy works fine at runtime with it.
"attrs>=23.2.0",
]
# trio 0.26+ (Python 3.13 compatible) is not yet on the Aliyun mirror.
# Mark PyPI as explicit so it is used only for packages listed in [tool.uv.sources].
[[tool.uv.index]]
name = "pypi"
url = "https://pypi.org/simple"
explicit = true
[[tool.uv.index]]
url = "https://mirrors.aliyun.com/pypi/simple"
feat: bump Python minimum from 3.12 to 3.13, drop strenum backport (#14767) Closes #14753 ## What changed | File | Change | |---|---| | `pyproject.toml` | `requires-python` → `>=3.13,<3.15`; remove `strenum==0.4.15` | | `Dockerfile` | `uv python install 3.13`, `uv sync --python 3.13` | | `.github/workflows/tests.yml` | `uv sync --python 3.13` on both matrix legs | | `CLAUDE.md` | dev setup command + requirements note updated | | `deepdoc/parser/mineru_parser.py` | `from strenum import StrEnum` → `from enum import StrEnum` | | `agent/tools/code_exec.py` | same | `StrEnum` has been in the stdlib since Python 3.11 — the `strenum` backport package is no longer needed once the floor is 3.13. ## Why uv.lock is not regenerated `uv lock --python 3.13` fails because: 1. The infiniflow/graspologic fork pins `numpy>=1.26.4,<2.0.0` 2. `tensorflow-cpu>=2.20.0` (the first release with cp313 wheels) depends on `ml-dtypes>=0.5.1`, which requires `numpy>=2.1.0` 3. These two constraints are irreconcilable on Python 3.13 The lockfile regeneration requires loosening the `numpy` upper bound in the `infiniflow/graspologic` fork. Once that fork commit is updated and the SHA in `pyproject.toml:49` is bumped, `uv lock --python 3.13` will succeed. ## RFC corrections Two claims in the original RFC (#14753) did not hold up under code review: - **"graspologic hard-blocks 3.13"** — the infiniflow fork at the pinned commit has no `<3.13` Python constraint. The blocker is the transitive `numpy<2.0.0` conflict with tensorflow-cpu's test dependency, not a direct Python version cap. - **"free-threading throughput gains for I/O-bound workload"** — Python 3.13 free-threading requires a special `--disable-gil` build and provides no benefit for async I/O code (the GIL is already released during I/O). The real motivation is forward compatibility and improved error messages.
2026-05-15 08:40:53 +02:00
[tool.uv.sources]
trio = [{ index = "pypi" }]
[tool.setuptools]
packages = [
'agent',
'api',
'deepdoc',
'graphrag',
'intergrations.chatgpt-on-wechat.plugins',
'mcp.server',
'rag',
'sdk.python.ragflow_sdk',
]
[tool.ruff]
line-length = 200
exclude = [".venv", "rag/svr/discord_svr.py"]
[tool.ruff.lint]
extend-select = ["ASYNC", "ASYNC1"]
ignore = ["E402"]
[tool.pytest.ini_options]
pythonpath = [
"."
]
testpaths = ["test"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
markers = [
"p0: critical priority test cases",
"p1: high priority test cases",
"p2: medium priority test cases",
"p3: low priority test cases",
"smoke: smoke test cases",
"auth: authentication UI tests",
"asyncio: mark test as async",
]
# Test collection and runtime configuration
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
filterwarnings = [
"error", # Treat warnings as errors
"ignore::DeprecationWarning", # Ignore specific warnings
"ignore:pkg_resources is deprecated:UserWarning",
]
# Command line options
addopts = [
"-v", # Verbose output
"--strict-markers", # Enforce marker definitions
"--tb=short", # Simplified traceback
"--disable-warnings", # Disable warnings
"--color=yes", # Colored output
"-p",
"no:anyio", # anyio's pytest plugin conflicts with pytest-asyncio on Py3.13
]
# Coverage configuration
[tool.coverage.run]
# Source paths - adjust according to your project structure
source = [
# "../../api/db/services",
# Add more directories if needed:
"../../common",
# "../../utils",
]
# Files/directories to exclude
omit = [
"*/tests/*",
"*/test_*",
"*/__pycache__/*",
"*/.pytest_cache/*",
"*/venv/*",
"*/.venv/*",
"*/env/*",
"*/site-packages/*",
"*/dist/*",
"*/build/*",
"*/migrations/*",
"setup.py"
]
[tool.coverage.report]
# Report configuration
precision = 2
show_missing = true
skip_covered = false
fail_under = 0 # Minimum coverage requirement (0-100)
# Lines to exclude (optional)
exclude_lines = [
# "pragma: no cover",
# "def __repr__",
# "raise AssertionError",
# "raise NotImplementedError",
# "if __name__ == .__main__.:",
# "if TYPE_CHECKING:",
"pass"
]
[tool.coverage.html]
# HTML report configuration
directory = "htmlcov"
title = "Test Coverage Report"
# extra_css = "custom.css" # Optional custom CSS