From f853a39b402c7c436a8d3f8b239f78e51de9d71e Mon Sep 17 00:00:00 2001 From: ucloudnb666 Date: Wed, 22 Apr 2026 15:38:34 +0800 Subject: [PATCH] feat: Add Astraflow provider support (global + China endpoints) (#14270) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Add Astraflow Provider Support This PR integrates [Astraflow](https://astraflow.ucloud.cn/) (by UCloud / 优刻得) as a new AI model provider in RAGFlow, with support for both global and China endpoints. ### About Astraflow Astraflow is an OpenAI-compatible AI model aggregation platform supporting 200+ models from major providers including DeepSeek, Qwen, GPT, Claude, Gemini, Llama, Mistral, and more. | Variant | Factory Name | Endpoint | Env Var | |---------|-------------|----------|---------| | Global | `Astraflow` | `https://api-us-ca.umodelverse.ai/v1` | `ASTRAFLOW_API_KEY` | | China | `Astraflow-CN` | `https://api.modelverse.cn/v1` | `ASTRAFLOW_CN_API_KEY` | - **API key signup**: https://astraflow.ucloud.cn/ --- ### Files Changed | File | Change | |------|--------| | `rag/llm/__init__.py` | Register `Astraflow` and `Astraflow-CN` in `SupportedLiteLLMProvider` enum, `FACTORY_DEFAULT_BASE_URL`, and `LITELLM_PROVIDER_PREFIX` | | `rag/llm/chat_model.py` | Add `AstraflowChat` and `AstraflowCNChat` (OpenAI-compatible `Base` subclass) | | `rag/llm/embedding_model.py` | Add `AstraflowEmbed` and `AstraflowCNEmbed` (subclasses of `OpenAIEmbed`) | | `rag/llm/rerank_model.py` | Add `AstraflowRerank` and `AstraflowCNRerank` (subclasses of `OpenAI_APIRerank`) | | `rag/llm/cv_model.py` | Add `AstraflowCV` and `AstraflowCNCV` (subclasses of `GptV4`) | | `rag/llm/tts_model.py` | Add `AstraflowTTS` and `AstraflowCNTTS` (subclasses of `OpenAITTS`) | | `rag/llm/sequence2txt_model.py` | Add `AstraflowSeq2txt` and `AstraflowCNSeq2txt` (subclasses of `GPTSeq2txt`) | | `conf/llm_factories.json` | Register `Astraflow` and `Astraflow-CN` factories with a curated list of popular models | --- ### Supported Model Types - ✅ **Chat / LLM** — DeepSeek-V3/R1, Qwen3, GPT-4o/4.1, Claude 3.5/3.7, Gemini 2.0/2.5 Flash, Llama 3.3/4, Mistral, and 200+ more - ✅ **Text Embedding** — text-embedding-3-small/large - ✅ **Image / Vision (IMAGE2TEXT)** — GPT-4o, GPT-4.1, Claude, Gemini, Llama-4, etc. - ✅ **Text Re-Rank** - ✅ **TTS** — tts-1 - ✅ **Speech-to-Text (SPEECH2TEXT)** — whisper-1 ### Implementation Notes - Uses the `openai/` LiteLLM prefix — consistent with other OpenAI-compatible aggregation platforms (SILICONFLOW, DeerAPI, CometAPI, OpenRouter, n1n, Avian, etc.) - `Astraflow` (global, rank 250) and `Astraflow-CN` (China, rank 249) are separate factory entries, allowing users to choose the optimal endpoint based on their region. - All model classes cleanly subclass existing base classes (`Base`, `OpenAIEmbed`, `OpenAI_APIRerank`, `GptV4`, `OpenAITTS`, `GPTSeq2txt`) with no custom logic needed — the provider is fully OpenAI-compatible. --------- Co-authored-by: user --- conf/llm_factories.json | 387 +++++++++++++++++++++--- docs/guides/models/supported_models.mdx | 2 + rag/llm/__init__.py | 6 + rag/llm/chat_model.py | 21 +- rag/llm/embedding_model.py | 18 ++ 5 files changed, 396 insertions(+), 38 deletions(-) diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 0cadfe3679..a03fe0baf2 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -377,7 +377,7 @@ "tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,TTS,SPEECH2TEXT,MODERATION", "status": "1", "rank": "950", - "url" : "https://dashscope.aliyuncs.com/compatible-mode/v1", + "url": "https://dashscope.aliyuncs.com/compatible-mode/v1", "llm": [ { "llm_name": "qwen3.5-122b-a10b", @@ -1557,53 +1557,52 @@ "rank": "980", "llm": [ { - "llm_name": "gemini-3-pro-preview", - "tags": "LLM,CHAT,1M,IMAGE2TEXT", - "max_tokens": 1048576, - "model_type": "image2text", - "is_tools": true + "llm_name": "gemini-3-pro-preview", + "tags": "LLM,CHAT,1M,IMAGE2TEXT", + "max_tokens": 1048576, + "model_type": "image2text", + "is_tools": true }, { - "llm_name": "gemini-2.5-flash", - "tags": "LLM,CHAT,1024K,IMAGE2TEXT", - "max_tokens": 1048576, - "model_type": "image2text", - "is_tools": true + "llm_name": "gemini-2.5-flash", + "tags": "LLM,CHAT,1024K,IMAGE2TEXT", + "max_tokens": 1048576, + "model_type": "image2text", + "is_tools": true }, { - "llm_name": "gemini-2.5-pro", - "tags": "LLM,CHAT,IMAGE2TEXT,1024K", - "max_tokens": 1048576, - "model_type": "image2text", - "is_tools": true + "llm_name": "gemini-2.5-pro", + "tags": "LLM,CHAT,IMAGE2TEXT,1024K", + "max_tokens": 1048576, + "model_type": "image2text", + "is_tools": true }, { - "llm_name": "gemini-2.5-flash-lite", - "tags": "LLM,CHAT,1024K,IMAGE2TEXT", - "max_tokens": 1048576, - "model_type": "image2text", - "is_tools": true + "llm_name": "gemini-2.5-flash-lite", + "tags": "LLM,CHAT,1024K,IMAGE2TEXT", + "max_tokens": 1048576, + "model_type": "image2text", + "is_tools": true }, { - "llm_name": "gemini-2.0-flash", - "tags": "LLM,CHAT,1024K", - "max_tokens": 1048576, - "model_type": "image2text", - "is_tools": true + "llm_name": "gemini-2.0-flash", + "tags": "LLM,CHAT,1024K", + "max_tokens": 1048576, + "model_type": "image2text", + "is_tools": true }, { - "llm_name": "gemini-2.0-flash-lite", - "tags": "LLM,CHAT,1024K", - "max_tokens": 1048576, - "model_type": "image2text", - "is_tools": true + "llm_name": "gemini-2.0-flash-lite", + "tags": "LLM,CHAT,1024K", + "max_tokens": 1048576, + "model_type": "image2text", + "is_tools": true }, - { - "llm_name": "gemini-embedding-001", - "tags": "TEXT EMBEDDING", - "max_tokens": 2048, - "model_type": "embedding" + "llm_name": "gemini-embedding-001", + "tags": "TEXT EMBEDDING", + "max_tokens": 2048, + "model_type": "embedding" } ] }, @@ -6293,6 +6292,320 @@ } ] }, + { + "name": "Astraflow", + "logo": "", + "tags": "LLM,TEXT EMBEDDING", + "status": "1", + "rank": "250", + "url": "https://api-us-ca.umodelverse.ai/v1", + "llm": [ + { + "llm_name": "claude-opus-4-7", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "claude-opus-4-6", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "claude-sonnet-4-5-20250929", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "claude-haiku-4-5-20251001", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gpt-5.4", + "tags": "LLM,CHAT,400k", + "max_tokens": 400000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gpt-5.4-mini", + "tags": "LLM,CHAT,400k", + "max_tokens": 400000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gpt-5.4-nano", + "tags": "LLM,CHAT,400k", + "max_tokens": 400000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gpt-4o-mini", + "tags": "LLM,CHAT,128k", + "max_tokens": 128000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "Qwen/Qwen3-Max", + "tags": "LLM,CHAT,131k", + "max_tokens": 131072, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "Qwen/Qwen3-Coder", + "tags": "LLM,CHAT,131k", + "max_tokens": 131072, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "Qwen/Qwen3-32B", + "tags": "LLM,CHAT,131k", + "max_tokens": 131072, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "tags": "LLM,CHAT,131k", + "max_tokens": 131072, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "kimi-k2.6", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "glm-5.1", + "tags": "LLM,CHAT,128k", + "max_tokens": 128000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "MiniMax-M2.7", + "tags": "LLM,CHAT,1M", + "max_tokens": 1000000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "MiniMax-M2", + "tags": "LLM,CHAT,1M", + "max_tokens": 1000000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gemini-2.5-pro", + "tags": "LLM,CHAT,1M", + "max_tokens": 1000000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gemini-2.5-flash", + "tags": "LLM,CHAT,1M", + "max_tokens": 1000000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "qwen3-embedding-8b", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8192, + "model_type": "embedding", + "is_tools": false + }, + { + "llm_name": "text-embedding-3-large", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8191, + "model_type": "embedding", + "is_tools": false + }, + { + "llm_name": "text-embedding-ada-002", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8191, + "model_type": "embedding", + "is_tools": false + } + ] + }, + { + "name": "Astraflow-CN", + "logo": "", + "tags": "LLM,TEXT EMBEDDING", + "status": "1", + "rank": "249", + "url": "https://api.modelverse.cn/v1", + "llm": [ + { + "llm_name": "claude-opus-4-7", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "claude-opus-4-6", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "claude-sonnet-4-5-20250929", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "claude-haiku-4-5-20251001", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gpt-5.4", + "tags": "LLM,CHAT,400k", + "max_tokens": 400000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gpt-5.4-mini", + "tags": "LLM,CHAT,400k", + "max_tokens": 400000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gpt-5.4-nano", + "tags": "LLM,CHAT,400k", + "max_tokens": 400000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gpt-4o-mini", + "tags": "LLM,CHAT,128k", + "max_tokens": 128000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "Qwen/Qwen3-Max", + "tags": "LLM,CHAT,131k", + "max_tokens": 131072, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "Qwen/Qwen3-Coder", + "tags": "LLM,CHAT,131k", + "max_tokens": 131072, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "Qwen/Qwen3-32B", + "tags": "LLM,CHAT,131k", + "max_tokens": 131072, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "tags": "LLM,CHAT,131k", + "max_tokens": 131072, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "kimi-k2.6", + "tags": "LLM,CHAT,200k", + "max_tokens": 200000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "glm-5.1", + "tags": "LLM,CHAT,128k", + "max_tokens": 128000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "MiniMax-M2.7", + "tags": "LLM,CHAT,1M", + "max_tokens": 1000000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "MiniMax-M2", + "tags": "LLM,CHAT,1M", + "max_tokens": 1000000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gemini-2.5-pro", + "tags": "LLM,CHAT,1M", + "max_tokens": 1000000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "gemini-2.5-flash", + "tags": "LLM,CHAT,1M", + "max_tokens": 1000000, + "model_type": "chat", + "is_tools": true + }, + { + "llm_name": "qwen3-embedding-8b", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8192, + "model_type": "embedding", + "is_tools": false + }, + { + "llm_name": "text-embedding-3-large", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8191, + "model_type": "embedding", + "is_tools": false + }, + { + "llm_name": "text-embedding-ada-002", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8191, + "model_type": "embedding", + "is_tools": false + } + ] + }, { "name": "Avian", "logo": "", @@ -6370,4 +6683,4 @@ ] } ] -} +} \ No newline at end of file diff --git a/docs/guides/models/supported_models.mdx b/docs/guides/models/supported_models.mdx index cc20e4120c..95290424d7 100644 --- a/docs/guides/models/supported_models.mdx +++ b/docs/guides/models/supported_models.mdx @@ -18,6 +18,8 @@ A complete list of models supported by RAGFlow, which will continue to expand. | Provider | LLM | Image2Text | Speech2text | TTS | Embedding | Rerank | OCR | | --------------------- | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ | | Anthropic | :heavy_check_mark: | | | | | | | +| Astraflow | :heavy_check_mark: | | | | :heavy_check_mark: | | | +| Astraflow-CN | :heavy_check_mark: | | | | :heavy_check_mark: | | | | Avian | :heavy_check_mark: | | | | | | | | Azure-OpenAI | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | | | | BaiChuan | :heavy_check_mark: | | | | :heavy_check_mark: | | | diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 77b1ff2b0e..aeb1748876 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -59,6 +59,8 @@ class SupportedLiteLLMProvider(StrEnum): n1n = "n1n" HunYuan = "Tencent Hunyuan" Avian = "Avian" + Astraflow = "Astraflow" + Astraflow_CN = "Astraflow-CN" FACTORY_DEFAULT_BASE_URL = { @@ -87,6 +89,8 @@ FACTORY_DEFAULT_BASE_URL = { SupportedLiteLLMProvider.n1n: "https://api.n1n.ai/v1", SupportedLiteLLMProvider.HunYuan: "https://api.hunyuan.cloud.tencent.com/v1", SupportedLiteLLMProvider.Avian: "https://api.avian.io/v1", + SupportedLiteLLMProvider.Astraflow: "https://api-us-ca.umodelverse.ai/v1", + SupportedLiteLLMProvider.Astraflow_CN: "https://api.modelverse.cn/v1", } @@ -127,6 +131,8 @@ LITELLM_PROVIDER_PREFIX = { SupportedLiteLLMProvider.n1n: "openai/", SupportedLiteLLMProvider.HunYuan: "openai/", SupportedLiteLLMProvider.Avian: "openai/", + SupportedLiteLLMProvider.Astraflow: "openai/", + SupportedLiteLLMProvider.Astraflow_CN: "openai/", } ChatModel = globals().get("ChatModel", {}) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index fb1353706d..a58e8450c0 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -30,11 +30,12 @@ import openai from openai import AsyncOpenAI, OpenAI from strenum import StrEnum +from common.misc_utils import thread_pool_exec from common.token_utils import num_tokens_from_string, total_token_count_from_response from rag.llm import FACTORY_DEFAULT_BASE_URL, LITELLM_PROVIDER_PREFIX, SupportedLiteLLMProvider from rag.nlp import is_chinese, is_english -from common.misc_utils import thread_pool_exec + class LLMErrorCode(StrEnum): ERROR_RATE_LIMIT = "RATE_LIMIT_EXCEEDED" ERROR_AUTHENTICATION = "AUTH_ERROR" @@ -1208,6 +1209,24 @@ class AvianChat(Base): super().__init__(key, model_name, base_url, **kwargs) +class AstraflowChat(Base): + _FACTORY_NAME = "Astraflow" + + def __init__(self, key, model_name, base_url="https://api-us-ca.umodelverse.ai/v1", **kwargs): + if not base_url: + base_url = "https://api-us-ca.umodelverse.ai/v1" + super().__init__(key, model_name, base_url, **kwargs) + + +class AstraflowCNChat(Base): + _FACTORY_NAME = "Astraflow-CN" + + def __init__(self, key, model_name, base_url="https://api.modelverse.cn/v1", **kwargs): + if not base_url: + base_url = "https://api.modelverse.cn/v1" + super().__init__(key, model_name, base_url, **kwargs) + + class LiteLLMBase(ABC): _FACTORY_NAME = [ "Tongyi-Qianwen", diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index 28ab2e2624..23b9105558 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -161,6 +161,24 @@ class AzureEmbed(OpenAIEmbed): self.model_name = model_name +class AstraflowEmbed(OpenAIEmbed): + _FACTORY_NAME = "Astraflow" + + def __init__(self, key, model_name, base_url="https://api-us-ca.umodelverse.ai/v1"): + if not base_url: + base_url = "https://api-us-ca.umodelverse.ai/v1" + super().__init__(key, model_name, base_url) + + +class AstraflowCNEmbed(OpenAIEmbed): + _FACTORY_NAME = "Astraflow-CN" + + def __init__(self, key, model_name, base_url="https://api.modelverse.cn/v1"): + if not base_url: + base_url = "https://api.modelverse.cn/v1" + super().__init__(key, model_name, base_url) + + class BaiChuanEmbed(OpenAIEmbed): _FACTORY_NAME = "BaiChuan"