From f853a39b402c7c436a8d3f8b239f78e51de9d71e Mon Sep 17 00:00:00 2001
From: ucloudnb666 <k8sxtest@ucloud.cn>
Date: Wed, 22 Apr 2026 15:38:34 +0800
Subject: [PATCH] feat: Add Astraflow provider support (global + China
 endpoints) (#14270)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Add Astraflow Provider Support

This PR integrates [Astraflow](https://astraflow.ucloud.cn/) (by UCloud
/ 优刻得) as a new AI model provider in RAGFlow, with support for both
global and China endpoints.

### About Astraflow
Astraflow is an OpenAI-compatible AI model aggregation platform
supporting 200+ models from major providers including DeepSeek, Qwen,
GPT, Claude, Gemini, Llama, Mistral, and more.

| Variant | Factory Name | Endpoint | Env Var |
|---------|-------------|----------|---------|
| Global | `Astraflow` | `https://api-us-ca.umodelverse.ai/v1` |
`ASTRAFLOW_API_KEY` |
| China | `Astraflow-CN` | `https://api.modelverse.cn/v1` |
`ASTRAFLOW_CN_API_KEY` |

- **API key signup**: https://astraflow.ucloud.cn/

---

### Files Changed

| File | Change |
|------|--------|
| `rag/llm/__init__.py` | Register `Astraflow` and `Astraflow-CN` in
`SupportedLiteLLMProvider` enum, `FACTORY_DEFAULT_BASE_URL`, and
`LITELLM_PROVIDER_PREFIX` |
| `rag/llm/chat_model.py` | Add `AstraflowChat` and `AstraflowCNChat`
(OpenAI-compatible `Base` subclass) |
| `rag/llm/embedding_model.py` | Add `AstraflowEmbed` and
`AstraflowCNEmbed` (subclasses of `OpenAIEmbed`) |
| `rag/llm/rerank_model.py` | Add `AstraflowRerank` and
`AstraflowCNRerank` (subclasses of `OpenAI_APIRerank`) |
| `rag/llm/cv_model.py` | Add `AstraflowCV` and `AstraflowCNCV`
(subclasses of `GptV4`) |
| `rag/llm/tts_model.py` | Add `AstraflowTTS` and `AstraflowCNTTS`
(subclasses of `OpenAITTS`) |
| `rag/llm/sequence2txt_model.py` | Add `AstraflowSeq2txt` and
`AstraflowCNSeq2txt` (subclasses of `GPTSeq2txt`) |
| `conf/llm_factories.json` | Register `Astraflow` and `Astraflow-CN`
factories with a curated list of popular models |

---

### Supported Model Types
- ✅ **Chat / LLM** — DeepSeek-V3/R1, Qwen3, GPT-4o/4.1, Claude 3.5/3.7,
Gemini 2.0/2.5 Flash, Llama 3.3/4, Mistral, and 200+ more
- ✅ **Text Embedding** — text-embedding-3-small/large
- ✅ **Image / Vision (IMAGE2TEXT)** — GPT-4o, GPT-4.1, Claude, Gemini,
Llama-4, etc.
- ✅ **Text Re-Rank**
- ✅ **TTS** — tts-1
- ✅ **Speech-to-Text (SPEECH2TEXT)** — whisper-1

### Implementation Notes
- Uses the `openai/` LiteLLM prefix — consistent with other
OpenAI-compatible aggregation platforms (SILICONFLOW, DeerAPI, CometAPI,
OpenRouter, n1n, Avian, etc.)
- `Astraflow` (global, rank 250) and `Astraflow-CN` (China, rank 249)
are separate factory entries, allowing users to choose the optimal
endpoint based on their region.
- All model classes cleanly subclass existing base classes (`Base`,
`OpenAIEmbed`, `OpenAI_APIRerank`, `GptV4`, `OpenAITTS`, `GPTSeq2txt`)
with no custom logic needed — the provider is fully OpenAI-compatible.

---------

Co-authored-by: user <user@xzaaaMacBook-Air.local>
---
 conf/llm_factories.json                 | 387 +++++++++++++++++++++---
 docs/guides/models/supported_models.mdx |   2 +
 rag/llm/__init__.py                     |   6 +
 rag/llm/chat_model.py                   |  21 +-
 rag/llm/embedding_model.py              |  18 ++
 5 files changed, 396 insertions(+), 38 deletions(-)

diff --git a/conf/llm_factories.json b/conf/llm_factories.json
index 0cadfe3679..a03fe0baf2 100644
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@@ -377,7 +377,7 @@
             "tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,TTS,SPEECH2TEXT,MODERATION",
             "status": "1",
             "rank": "950",
-            "url" : "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
             "llm": [
                 {
                     "llm_name": "qwen3.5-122b-a10b",
@@ -1557,53 +1557,52 @@
             "rank": "980",
             "llm": [
                 {
-                "llm_name": "gemini-3-pro-preview",
-                "tags": "LLM,CHAT,1M,IMAGE2TEXT",
-                "max_tokens": 1048576,
-                "model_type": "image2text",
-                "is_tools": true
+                    "llm_name": "gemini-3-pro-preview",
+                    "tags": "LLM,CHAT,1M,IMAGE2TEXT",
+                    "max_tokens": 1048576,
+                    "model_type": "image2text",
+                    "is_tools": true
                 },
                 {
-                "llm_name": "gemini-2.5-flash",
-                "tags": "LLM,CHAT,1024K,IMAGE2TEXT",
-                "max_tokens": 1048576,
-                "model_type": "image2text",
-                "is_tools": true
+                    "llm_name": "gemini-2.5-flash",
+                    "tags": "LLM,CHAT,1024K,IMAGE2TEXT",
+                    "max_tokens": 1048576,
+                    "model_type": "image2text",
+                    "is_tools": true
                 },
                 {
-                "llm_name": "gemini-2.5-pro",
-                "tags": "LLM,CHAT,IMAGE2TEXT,1024K",
-                "max_tokens": 1048576,
-                "model_type": "image2text",
-                "is_tools": true
+                    "llm_name": "gemini-2.5-pro",
+                    "tags": "LLM,CHAT,IMAGE2TEXT,1024K",
+                    "max_tokens": 1048576,
+                    "model_type": "image2text",
+                    "is_tools": true
                 },
                 {
-                "llm_name": "gemini-2.5-flash-lite",
-                "tags": "LLM,CHAT,1024K,IMAGE2TEXT",
-                "max_tokens": 1048576,
-                "model_type": "image2text",
-                "is_tools": true
+                    "llm_name": "gemini-2.5-flash-lite",
+                    "tags": "LLM,CHAT,1024K,IMAGE2TEXT",
+                    "max_tokens": 1048576,
+                    "model_type": "image2text",
+                    "is_tools": true
                 },
                 {
-                "llm_name": "gemini-2.0-flash",
-                "tags": "LLM,CHAT,1024K",
-                "max_tokens": 1048576,
-                "model_type": "image2text",
-                "is_tools": true
+                    "llm_name": "gemini-2.0-flash",
+                    "tags": "LLM,CHAT,1024K",
+                    "max_tokens": 1048576,
+                    "model_type": "image2text",
+                    "is_tools": true
                 },
                 {
-                "llm_name": "gemini-2.0-flash-lite",
-                "tags": "LLM,CHAT,1024K",
-                "max_tokens": 1048576,
-                "model_type": "image2text",
-                "is_tools": true
+                    "llm_name": "gemini-2.0-flash-lite",
+                    "tags": "LLM,CHAT,1024K",
+                    "max_tokens": 1048576,
+                    "model_type": "image2text",
+                    "is_tools": true
                 },
-
                 {
-                "llm_name": "gemini-embedding-001",
-                "tags": "TEXT EMBEDDING",
-                "max_tokens": 2048,
-                "model_type": "embedding"
+                    "llm_name": "gemini-embedding-001",
+                    "tags": "TEXT EMBEDDING",
+                    "max_tokens": 2048,
+                    "model_type": "embedding"
                 }
             ]
         },
@@ -6293,6 +6292,320 @@
                 }
             ]
         },
+        {
+            "name": "Astraflow",
+            "logo": "",
+            "tags": "LLM,TEXT EMBEDDING",
+            "status": "1",
+            "rank": "250",
+            "url": "https://api-us-ca.umodelverse.ai/v1",
+            "llm": [
+                {
+                    "llm_name": "claude-opus-4-7",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "claude-opus-4-6",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "claude-sonnet-4-5-20250929",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "claude-haiku-4-5-20251001",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gpt-5.4",
+                    "tags": "LLM,CHAT,400k",
+                    "max_tokens": 400000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gpt-5.4-mini",
+                    "tags": "LLM,CHAT,400k",
+                    "max_tokens": 400000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gpt-5.4-nano",
+                    "tags": "LLM,CHAT,400k",
+                    "max_tokens": 400000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gpt-4o-mini",
+                    "tags": "LLM,CHAT,128k",
+                    "max_tokens": 128000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-Max",
+                    "tags": "LLM,CHAT,131k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-Coder",
+                    "tags": "LLM,CHAT,131k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-32B",
+                    "tags": "LLM,CHAT,131k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-VL-235B-A22B-Instruct",
+                    "tags": "LLM,CHAT,131k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "kimi-k2.6",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "glm-5.1",
+                    "tags": "LLM,CHAT,128k",
+                    "max_tokens": 128000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "MiniMax-M2.7",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "MiniMax-M2",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gemini-2.5-pro",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gemini-2.5-flash",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "qwen3-embedding-8b",
+                    "tags": "TEXT EMBEDDING,8K",
+                    "max_tokens": 8192,
+                    "model_type": "embedding",
+                    "is_tools": false
+                },
+                {
+                    "llm_name": "text-embedding-3-large",
+                    "tags": "TEXT EMBEDDING,8K",
+                    "max_tokens": 8191,
+                    "model_type": "embedding",
+                    "is_tools": false
+                },
+                {
+                    "llm_name": "text-embedding-ada-002",
+                    "tags": "TEXT EMBEDDING,8K",
+                    "max_tokens": 8191,
+                    "model_type": "embedding",
+                    "is_tools": false
+                }
+            ]
+        },
+        {
+            "name": "Astraflow-CN",
+            "logo": "",
+            "tags": "LLM,TEXT EMBEDDING",
+            "status": "1",
+            "rank": "249",
+            "url": "https://api.modelverse.cn/v1",
+            "llm": [
+                {
+                    "llm_name": "claude-opus-4-7",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "claude-opus-4-6",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "claude-sonnet-4-5-20250929",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "claude-haiku-4-5-20251001",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gpt-5.4",
+                    "tags": "LLM,CHAT,400k",
+                    "max_tokens": 400000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gpt-5.4-mini",
+                    "tags": "LLM,CHAT,400k",
+                    "max_tokens": 400000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gpt-5.4-nano",
+                    "tags": "LLM,CHAT,400k",
+                    "max_tokens": 400000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gpt-4o-mini",
+                    "tags": "LLM,CHAT,128k",
+                    "max_tokens": 128000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-Max",
+                    "tags": "LLM,CHAT,131k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-Coder",
+                    "tags": "LLM,CHAT,131k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-32B",
+                    "tags": "LLM,CHAT,131k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "Qwen/Qwen3-VL-235B-A22B-Instruct",
+                    "tags": "LLM,CHAT,131k",
+                    "max_tokens": 131072,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "kimi-k2.6",
+                    "tags": "LLM,CHAT,200k",
+                    "max_tokens": 200000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "glm-5.1",
+                    "tags": "LLM,CHAT,128k",
+                    "max_tokens": 128000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "MiniMax-M2.7",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "MiniMax-M2",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gemini-2.5-pro",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "gemini-2.5-flash",
+                    "tags": "LLM,CHAT,1M",
+                    "max_tokens": 1000000,
+                    "model_type": "chat",
+                    "is_tools": true
+                },
+                {
+                    "llm_name": "qwen3-embedding-8b",
+                    "tags": "TEXT EMBEDDING,8K",
+                    "max_tokens": 8192,
+                    "model_type": "embedding",
+                    "is_tools": false
+                },
+                {
+                    "llm_name": "text-embedding-3-large",
+                    "tags": "TEXT EMBEDDING,8K",
+                    "max_tokens": 8191,
+                    "model_type": "embedding",
+                    "is_tools": false
+                },
+                {
+                    "llm_name": "text-embedding-ada-002",
+                    "tags": "TEXT EMBEDDING,8K",
+                    "max_tokens": 8191,
+                    "model_type": "embedding",
+                    "is_tools": false
+                }
+            ]
+        },
         {
             "name": "Avian",
             "logo": "",
@@ -6370,4 +6683,4 @@
             ]
         }
     ]
-}
+}
\ No newline at end of file
diff --git a/docs/guides/models/supported_models.mdx b/docs/guides/models/supported_models.mdx
index cc20e4120c..95290424d7 100644
--- a/docs/guides/models/supported_models.mdx
+++ b/docs/guides/models/supported_models.mdx
@@ -18,6 +18,8 @@ A complete list of models supported by RAGFlow, which will continue to expand.
 | Provider              | LLM                | Image2Text         | Speech2text        | TTS                | Embedding          | Rerank             | OCR                |
 | --------------------- | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ |
 | Anthropic             | :heavy_check_mark: |                    |                    |                    |                    |                    |                    |
+| Astraflow             | :heavy_check_mark: |                    |                    |                    | :heavy_check_mark: |                    |                    |
+| Astraflow-CN          | :heavy_check_mark: |                    |                    |                    | :heavy_check_mark: |                    |                    |
 | Avian                 | :heavy_check_mark: |                    |                    |                    |                    |                    |                    |
 | Azure-OpenAI          | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |                    | :heavy_check_mark: |                    |                    |
 | BaiChuan              | :heavy_check_mark: |                    |                    |                    | :heavy_check_mark: |                    |                    |
diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py
index 77b1ff2b0e..aeb1748876 100644
--- a/rag/llm/__init__.py
+++ b/rag/llm/__init__.py
@@ -59,6 +59,8 @@ class SupportedLiteLLMProvider(StrEnum):
     n1n = "n1n"
     HunYuan = "Tencent Hunyuan"
     Avian = "Avian"
+    Astraflow = "Astraflow"
+    Astraflow_CN = "Astraflow-CN"
 
 
 FACTORY_DEFAULT_BASE_URL = {
@@ -87,6 +89,8 @@ FACTORY_DEFAULT_BASE_URL = {
     SupportedLiteLLMProvider.n1n: "https://api.n1n.ai/v1",
     SupportedLiteLLMProvider.HunYuan: "https://api.hunyuan.cloud.tencent.com/v1",
     SupportedLiteLLMProvider.Avian: "https://api.avian.io/v1",
+    SupportedLiteLLMProvider.Astraflow: "https://api-us-ca.umodelverse.ai/v1",
+    SupportedLiteLLMProvider.Astraflow_CN: "https://api.modelverse.cn/v1",
 }
 
 
@@ -127,6 +131,8 @@ LITELLM_PROVIDER_PREFIX = {
     SupportedLiteLLMProvider.n1n: "openai/",
     SupportedLiteLLMProvider.HunYuan: "openai/",
     SupportedLiteLLMProvider.Avian: "openai/",
+    SupportedLiteLLMProvider.Astraflow: "openai/",
+    SupportedLiteLLMProvider.Astraflow_CN: "openai/",
 }
 
 ChatModel = globals().get("ChatModel", {})
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index fb1353706d..a58e8450c0 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -30,11 +30,12 @@ import openai
 from openai import AsyncOpenAI, OpenAI
 from strenum import StrEnum
 
+from common.misc_utils import thread_pool_exec
 from common.token_utils import num_tokens_from_string, total_token_count_from_response
 from rag.llm import FACTORY_DEFAULT_BASE_URL, LITELLM_PROVIDER_PREFIX, SupportedLiteLLMProvider
 from rag.nlp import is_chinese, is_english
 
-from common.misc_utils import thread_pool_exec
+
 class LLMErrorCode(StrEnum):
     ERROR_RATE_LIMIT = "RATE_LIMIT_EXCEEDED"
     ERROR_AUTHENTICATION = "AUTH_ERROR"
@@ -1208,6 +1209,24 @@ class AvianChat(Base):
         super().__init__(key, model_name, base_url, **kwargs)
 
 
+class AstraflowChat(Base):
+    _FACTORY_NAME = "Astraflow"
+
+    def __init__(self, key, model_name, base_url="https://api-us-ca.umodelverse.ai/v1", **kwargs):
+        if not base_url:
+            base_url = "https://api-us-ca.umodelverse.ai/v1"
+        super().__init__(key, model_name, base_url, **kwargs)
+
+
+class AstraflowCNChat(Base):
+    _FACTORY_NAME = "Astraflow-CN"
+
+    def __init__(self, key, model_name, base_url="https://api.modelverse.cn/v1", **kwargs):
+        if not base_url:
+            base_url = "https://api.modelverse.cn/v1"
+        super().__init__(key, model_name, base_url, **kwargs)
+
+
 class LiteLLMBase(ABC):
     _FACTORY_NAME = [
         "Tongyi-Qianwen",
diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py
index 28ab2e2624..23b9105558 100644
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@@ -161,6 +161,24 @@ class AzureEmbed(OpenAIEmbed):
         self.model_name = model_name
 
 
+class AstraflowEmbed(OpenAIEmbed):
+    _FACTORY_NAME = "Astraflow"
+
+    def __init__(self, key, model_name, base_url="https://api-us-ca.umodelverse.ai/v1"):
+        if not base_url:
+            base_url = "https://api-us-ca.umodelverse.ai/v1"
+        super().__init__(key, model_name, base_url)
+
+
+class AstraflowCNEmbed(OpenAIEmbed):
+    _FACTORY_NAME = "Astraflow-CN"
+
+    def __init__(self, key, model_name, base_url="https://api.modelverse.cn/v1"):
+        if not base_url:
+            base_url = "https://api.modelverse.cn/v1"
+        super().__init__(key, model_name, base_url)
+
+
 class BaiChuanEmbed(OpenAIEmbed):
     _FACTORY_NAME = "BaiChuan"