2026-06-08 11:02:40 +08:00
|
|
|
#
|
|
|
|
|
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
#
|
2026-06-08 14:54:59 +08:00
|
|
|
import json
|
2026-06-08 11:02:40 +08:00
|
|
|
import aiohttp
|
|
|
|
|
from abc import ABC
|
2026-06-08 20:18:00 +08:00
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
from json.decoder import JSONDecodeError
|
2026-06-08 11:02:40 +08:00
|
|
|
|
|
|
|
|
from common.constants import LLMType
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Base(ABC):
|
2026-06-08 04:05:25 -07:00
|
|
|
def __init__(self, api_key: str, base_url: str = None):
|
2026-06-08 11:02:40 +08:00
|
|
|
self.api_key = api_key
|
|
|
|
|
self.base_url = base_url
|
|
|
|
|
|
|
|
|
|
def _get_api_key(self):
|
|
|
|
|
return self.api_key
|
|
|
|
|
|
|
|
|
|
def _get_model_list_url(self):
|
|
|
|
|
if not self.base_url:
|
|
|
|
|
return None
|
|
|
|
|
if "/v1" in self.base_url:
|
|
|
|
|
return self.base_url.split("/v1")[0].rstrip("/") + "/v1/models"
|
|
|
|
|
return self.base_url.rstrip("/") + "/v1/models"
|
|
|
|
|
|
|
|
|
|
async def _get_raw_model_list(self):
|
|
|
|
|
url = self._get_model_list_url()
|
|
|
|
|
if not url:
|
|
|
|
|
return None
|
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
|
|
|
async with session.get(url, headers={"Authorization": f"Bearer {self._get_api_key()}"}) as resp:
|
|
|
|
|
if resp.status != 200:
|
|
|
|
|
return None
|
|
|
|
|
return await resp.json()
|
|
|
|
|
|
|
|
|
|
def _format_model_list(self, raw_model_list):
|
|
|
|
|
return raw_model_list
|
|
|
|
|
|
|
|
|
|
async def get_model_list(self):
|
|
|
|
|
raw_model_list = await self._get_raw_model_list()
|
|
|
|
|
if not raw_model_list:
|
|
|
|
|
return []
|
|
|
|
|
return self._format_model_list(raw_model_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class VolcEngine(Base):
|
|
|
|
|
_FACTORY_NAME = "VolcEngine"
|
|
|
|
|
|
2026-06-08 20:18:00 +08:00
|
|
|
def _get_api_key(self):
|
|
|
|
|
try:
|
|
|
|
|
api_key = json.loads(self.api_key).get("ark_api_key", "")
|
|
|
|
|
except JSONDecodeError:
|
|
|
|
|
api_key = self.api_key
|
|
|
|
|
return api_key
|
|
|
|
|
|
|
|
|
|
def _get_model_list_url(self):
|
|
|
|
|
if not self.base_url:
|
|
|
|
|
self.base_url = "https://ark.cn-beijing.volces.com/api/v3"
|
|
|
|
|
parsed = urlparse(self.base_url)
|
|
|
|
|
return f"{parsed.scheme}://{parsed.netloc}/api/v3/models"
|
|
|
|
|
|
|
|
|
|
def _format_model_list(self, raw_model_list):
|
|
|
|
|
serving_model = [model for model in raw_model_list["data"] if model.get("status", "") != "Shutdown"]
|
|
|
|
|
res = []
|
|
|
|
|
for model in serving_model:
|
|
|
|
|
|
|
|
|
|
model_types = []
|
|
|
|
|
|
|
|
|
|
if model.get("domain", "") == "Embedding":
|
|
|
|
|
model_types.append(LLMType.EMBEDDING.value)
|
|
|
|
|
else:
|
|
|
|
|
modalities = model.get("modalities", {})
|
|
|
|
|
input_modalities = modalities.get("input_modalities", [])
|
|
|
|
|
output_modalities = modalities.get("output_modalities", [])
|
|
|
|
|
|
|
|
|
|
if "text" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.CHAT.value)
|
|
|
|
|
if "embeddings" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.EMBEDDING.value)
|
|
|
|
|
if "image" in input_modalities and "text" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.IMAGE2TEXT.value)
|
|
|
|
|
if "audio" in input_modalities and "text" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.SPEECH2TEXT.value)
|
|
|
|
|
if "audio" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.TTS.value)
|
|
|
|
|
|
|
|
|
|
if not model_types:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
features = []
|
|
|
|
|
if model.get("features", {}).get("tools", {}).get("function_calling", False):
|
|
|
|
|
features.append("is_tools")
|
|
|
|
|
if model.get("token_limits", {}).get("max_reasoning_token_length", 0) > 0:
|
|
|
|
|
features.append("thinking")
|
|
|
|
|
|
|
|
|
|
res.append({
|
|
|
|
|
"name": model["id"],
|
|
|
|
|
"model_types": model_types,
|
|
|
|
|
"features": features,
|
|
|
|
|
"max_tokens": model.get("token_limits", {}).get("max_input_token_length", 8192),
|
|
|
|
|
"status": model.get("status")
|
|
|
|
|
})
|
|
|
|
|
return res
|
2026-06-08 11:02:40 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class Ollama(Base):
|
|
|
|
|
_FACTORY_NAME = "Ollama"
|
|
|
|
|
|
2026-06-08 04:05:25 -07:00
|
|
|
def _get_model_tags_url(self):
|
|
|
|
|
return self.base_url.rstrip("/") + "/api/tags"
|
|
|
|
|
|
|
|
|
|
def _get_model_detail_url(self):
|
|
|
|
|
return self.base_url.rstrip("/") + "/api/show"
|
|
|
|
|
|
|
|
|
|
async def get_model_list(self):
|
|
|
|
|
if not self.base_url:
|
|
|
|
|
return []
|
|
|
|
|
headers = {}
|
|
|
|
|
if self.api_key:
|
|
|
|
|
headers.update({"Authorization": f"Bearer {self._get_api_key()}"})
|
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
|
|
|
async with session.get(self._get_model_tags_url(), headers=headers) as resp:
|
|
|
|
|
if resp.status != 200:
|
|
|
|
|
return []
|
|
|
|
|
tags = await resp.json()
|
|
|
|
|
models = tags.get("models", [])
|
|
|
|
|
if not models:
|
|
|
|
|
return []
|
|
|
|
|
res = []
|
|
|
|
|
capability_to_model_type_mapping = {"completion": LLMType.CHAT.value, "vision": LLMType.IMAGE2TEXT.value, "embedding": LLMType.EMBEDDING.value}
|
|
|
|
|
capability_to_feature_mapping = {"thinking": "thinking", "tools": "is_tools"}
|
|
|
|
|
|
|
|
|
|
for model in models:
|
|
|
|
|
async with session.post(self._get_model_detail_url(), headers=headers, json={"model": model["name"]}) as resp:
|
|
|
|
|
if resp.status != 200:
|
|
|
|
|
continue
|
|
|
|
|
model_info = await resp.json()
|
|
|
|
|
max_tokens_key = "{}.context_length".format(model_info.get("details", {}).get("family", ""))
|
|
|
|
|
res.append(
|
|
|
|
|
{
|
|
|
|
|
"name": model["name"],
|
|
|
|
|
"model_types": [capability_to_model_type_mapping[c] for c in model_info.get("capabilities", []) if c in capability_to_model_type_mapping],
|
|
|
|
|
"features": [capability_to_feature_mapping[c] for c in model_info.get("capabilities", []) if c in capability_to_feature_mapping],
|
|
|
|
|
"max_tokens": model_info["model_info"].get(max_tokens_key, 8192),
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Xinference(Base):
|
|
|
|
|
_FACTORY_NAME = "Xinference"
|
|
|
|
|
|
|
|
|
|
def _get_model_list_url(self):
|
|
|
|
|
if not self.base_url:
|
|
|
|
|
return None
|
|
|
|
|
return self.base_url.rstrip("/") + "/v1/models"
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _xinference_model_type_to_llm_type(model_type_str):
|
|
|
|
|
"""Map Xinference model type strings to RAGFlow LLMType values."""
|
|
|
|
|
mapping = {
|
|
|
|
|
"LLM": LLMType.CHAT.value,
|
|
|
|
|
"chat": LLMType.CHAT.value,
|
|
|
|
|
"embedding": LLMType.EMBEDDING.value,
|
|
|
|
|
"rerank": LLMType.RERANK.value,
|
|
|
|
|
"image": LLMType.IMAGE2TEXT.value,
|
|
|
|
|
"TTS": LLMType.TTS.value,
|
|
|
|
|
"speech2text": LLMType.SPEECH2TEXT.value,
|
|
|
|
|
}
|
|
|
|
|
return mapping.get(model_type_str, LLMType.CHAT.value)
|
|
|
|
|
|
|
|
|
|
def _format_model_list(self, raw_model_list):
|
|
|
|
|
"""Xinference /v1/models returns model_type and context_length in addition to OpenAI-standard fields."""
|
|
|
|
|
data = raw_model_list.get("data", [])
|
|
|
|
|
if not data:
|
|
|
|
|
return []
|
|
|
|
|
res = []
|
|
|
|
|
for model in data:
|
|
|
|
|
model_id = model.get("id")
|
|
|
|
|
if not model_id:
|
|
|
|
|
continue
|
|
|
|
|
model_type_str = model.get("model_type", "")
|
|
|
|
|
model_type = self._xinference_model_type_to_llm_type(model_type_str) if model_type_str else LLMType.CHAT.value
|
|
|
|
|
max_tokens = model.get("context_length") or model.get("max_tokens") or 8192
|
|
|
|
|
res.append(
|
|
|
|
|
{
|
|
|
|
|
"name": model_id,
|
|
|
|
|
"model_types": [model_type],
|
|
|
|
|
"features": None,
|
|
|
|
|
"max_tokens": max_tokens,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LocalAI(Base):
|
|
|
|
|
"""LocalAI exposes Ollama-compatible /api/tags and /api/show endpoints.
|
|
|
|
|
|
|
|
|
|
``GET /api/tags`` returns model list with capabilities (completion, embedding, vision, tools, thinking).
|
|
|
|
|
``POST /api/show`` returns ``model_info`` containing ``general.context_length``.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
_FACTORY_NAME = "LocalAI"
|
|
|
|
|
|
2026-06-08 11:02:40 +08:00
|
|
|
def _get_model_tags_url(self):
|
|
|
|
|
return self.base_url.rstrip("/") + "/api/tags"
|
|
|
|
|
|
|
|
|
|
def _get_model_detail_url(self):
|
|
|
|
|
return self.base_url.rstrip("/") + "/api/show"
|
|
|
|
|
|
|
|
|
|
async def get_model_list(self):
|
|
|
|
|
if not self.base_url:
|
|
|
|
|
return []
|
|
|
|
|
headers = {}
|
|
|
|
|
if self.api_key:
|
|
|
|
|
headers.update({"Authorization": f"Bearer {self._get_api_key()}"})
|
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
|
|
|
async with session.get(self._get_model_tags_url(), headers=headers) as resp:
|
|
|
|
|
if resp.status != 200:
|
|
|
|
|
return []
|
|
|
|
|
tags = await resp.json()
|
|
|
|
|
models = tags.get("models", [])
|
|
|
|
|
if not models:
|
|
|
|
|
return []
|
|
|
|
|
res = []
|
|
|
|
|
capability_to_model_type_mapping = {
|
|
|
|
|
"completion": LLMType.CHAT.value,
|
|
|
|
|
"vision": LLMType.IMAGE2TEXT.value,
|
2026-06-08 04:05:25 -07:00
|
|
|
"embedding": LLMType.EMBEDDING.value,
|
2026-06-08 11:02:40 +08:00
|
|
|
}
|
|
|
|
|
capability_to_feature_mapping = {
|
|
|
|
|
"thinking": "thinking",
|
2026-06-08 04:05:25 -07:00
|
|
|
"tools": "is_tools",
|
2026-06-08 11:02:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for model in models:
|
2026-06-08 04:05:25 -07:00
|
|
|
async with session.post(
|
|
|
|
|
self._get_model_detail_url(),
|
|
|
|
|
headers=headers,
|
|
|
|
|
json={"model": model["name"]},
|
|
|
|
|
) as resp:
|
2026-06-08 11:02:40 +08:00
|
|
|
if resp.status != 200:
|
|
|
|
|
continue
|
|
|
|
|
model_info = await resp.json()
|
2026-06-08 04:05:25 -07:00
|
|
|
context_length = model_info.get("model_info", {}).get("general.context_length", 8192)
|
|
|
|
|
res.append(
|
|
|
|
|
{
|
|
|
|
|
"name": model["name"],
|
|
|
|
|
"model_types": [capability_to_model_type_mapping[c] for c in model_info.get("capabilities", []) if c in capability_to_model_type_mapping],
|
|
|
|
|
"features": [capability_to_feature_mapping[c] for c in model_info.get("capabilities", []) if c in capability_to_feature_mapping],
|
|
|
|
|
"max_tokens": context_length or 8192,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BaiduYiyan(Base):
|
|
|
|
|
_FACTORY_NAME = "BaiduYiyan"
|
|
|
|
|
|
|
|
|
|
async def get_model_list(self):
|
|
|
|
|
"""BaiduYiyan uses the Qianfan SDK which provides static model catalogs.
|
|
|
|
|
|
|
|
|
|
The ``models()`` class method returns all supported model names
|
|
|
|
|
without requiring AK/SK credentials.
|
|
|
|
|
``get_model_info()`` returns ``max_input_tokens`` for each model.
|
|
|
|
|
"""
|
|
|
|
|
import qianfan
|
|
|
|
|
|
|
|
|
|
res = []
|
|
|
|
|
real = qianfan.ChatCompletion._real_base("1")
|
|
|
|
|
chat_models = real.models()
|
|
|
|
|
for name in chat_models:
|
|
|
|
|
max_tokens = 8192
|
|
|
|
|
try:
|
|
|
|
|
info = real.get_model_info(name)
|
|
|
|
|
if info.max_input_tokens:
|
|
|
|
|
max_tokens = info.max_input_tokens
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
res.append(
|
|
|
|
|
{
|
|
|
|
|
"name": name,
|
|
|
|
|
"model_types": [LLMType.CHAT.value],
|
|
|
|
|
"features": None,
|
|
|
|
|
"max_tokens": max_tokens,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
embed_models = qianfan.Embedding.models()
|
|
|
|
|
for name in embed_models:
|
|
|
|
|
res.append(
|
|
|
|
|
{
|
|
|
|
|
"name": name,
|
|
|
|
|
"model_types": [LLMType.EMBEDDING.value],
|
|
|
|
|
"features": None,
|
|
|
|
|
"max_tokens": 8192,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2026-06-08 11:02:40 +08:00
|
|
|
return res
|
2026-06-08 14:54:59 +08:00
|
|
|
|
|
|
|
|
|
2026-06-08 16:42:17 +08:00
|
|
|
class OpenRouter(Base):
|
|
|
|
|
_FACTORY_NAME = "OpenRouter"
|
|
|
|
|
|
|
|
|
|
def _get_api_key(self):
|
|
|
|
|
api_key = self.api_key
|
|
|
|
|
if not api_key:
|
|
|
|
|
return ""
|
|
|
|
|
try:
|
|
|
|
|
payload = json.loads(api_key)
|
|
|
|
|
except Exception:
|
|
|
|
|
return api_key
|
|
|
|
|
if isinstance(payload, dict):
|
|
|
|
|
return payload.get("api_key") or api_key
|
|
|
|
|
return api_key
|
|
|
|
|
|
|
|
|
|
def _get_model_list_url(self):
|
|
|
|
|
tail = "/api/v1/models?output_modalities=all"
|
|
|
|
|
if not self.base_url:
|
|
|
|
|
return "https://openrouter.ai" + tail
|
|
|
|
|
base_url = self.base_url.rstrip("/")
|
|
|
|
|
if "/api/v1" in base_url:
|
|
|
|
|
return base_url.split("/api/v1")[0].rstrip("/") + tail
|
|
|
|
|
if "/v1" in base_url:
|
|
|
|
|
return base_url.split("/v1")[0].rstrip("/") + tail
|
|
|
|
|
return base_url + tail
|
|
|
|
|
|
|
|
|
|
def _format_model_list(self, raw_model_list):
|
|
|
|
|
models = raw_model_list.get("data") if isinstance(raw_model_list, dict) else raw_model_list
|
|
|
|
|
if not isinstance(models, list):
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
model_list = []
|
|
|
|
|
for model in models:
|
|
|
|
|
if not isinstance(model, dict):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
model_name = model.get("id") or model.get("name") or model.get("canonical_slug")
|
|
|
|
|
if not model_name:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
architecture = model.get("architecture") or {}
|
|
|
|
|
input_modalities = set(architecture.get("input_modalities") or [])
|
|
|
|
|
output_modalities = set(architecture.get("output_modalities") or [])
|
|
|
|
|
supported_parameters = set(model.get("supported_parameters") or [])
|
|
|
|
|
|
|
|
|
|
model_types = []
|
|
|
|
|
if "text" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.CHAT.value)
|
|
|
|
|
if "embeddings" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.EMBEDDING.value)
|
|
|
|
|
if "image" in input_modalities and "text" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.IMAGE2TEXT.value)
|
|
|
|
|
if "audio" in input_modalities and "text" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.SPEECH2TEXT.value)
|
|
|
|
|
if "audio" in output_modalities:
|
|
|
|
|
model_types.append(LLMType.TTS.value)
|
|
|
|
|
|
|
|
|
|
features = []
|
|
|
|
|
if "tools" in supported_parameters:
|
|
|
|
|
features.append("is_tools")
|
|
|
|
|
if supported_parameters & {"reasoning", "include_reasoning"}:
|
|
|
|
|
features.append("thinking")
|
|
|
|
|
|
2026-06-08 04:05:25 -07:00
|
|
|
max_tokens = (model.get("top_provider") or {}).get("max_completion_tokens") or model.get("context_length") or (model.get("top_provider") or {}).get("context_length") or 8192
|
2026-06-08 16:42:17 +08:00
|
|
|
|
2026-06-08 04:05:25 -07:00
|
|
|
model_list.append(
|
|
|
|
|
{
|
|
|
|
|
"name": model_name,
|
|
|
|
|
"model_types": list(dict.fromkeys(model_types)),
|
|
|
|
|
"features": features,
|
|
|
|
|
"max_tokens": max_tokens,
|
|
|
|
|
}
|
|
|
|
|
)
|
2026-06-08 16:42:17 +08:00
|
|
|
|
|
|
|
|
return model_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OpenAIAPICompatible(Base):
|
|
|
|
|
_FACTORY_NAME = "OpenAI-API-Compatible"
|
|
|
|
|
|
|
|
|
|
_EMBEDDING_HINTS = ("embed", "embedding")
|
|
|
|
|
_RERANK_HINTS = ("rerank", "reranker")
|
|
|
|
|
_SPEECH2TEXT_HINTS = ("asr", "stt", "transcribe", "transcriber", "whisper")
|
|
|
|
|
_TTS_HINTS = ("tts", "text-to-speech")
|
|
|
|
|
_VISION_HINTS = (
|
|
|
|
|
"vl",
|
|
|
|
|
"vision",
|
|
|
|
|
"llava",
|
|
|
|
|
"internvl",
|
|
|
|
|
"minicpm-v",
|
|
|
|
|
"gpt-4o",
|
|
|
|
|
"glm-4v",
|
|
|
|
|
"qvq",
|
|
|
|
|
"qwen-vl",
|
|
|
|
|
"pixtral",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _contains_hint(cls, model_name, hints):
|
|
|
|
|
return any(hint in model_name for hint in hints)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _infer_model_types(cls, model_name):
|
|
|
|
|
if cls._contains_hint(model_name, cls._RERANK_HINTS):
|
|
|
|
|
return [LLMType.RERANK.value]
|
|
|
|
|
if cls._contains_hint(model_name, cls._EMBEDDING_HINTS):
|
|
|
|
|
return [LLMType.EMBEDDING.value]
|
|
|
|
|
if cls._contains_hint(model_name, cls._SPEECH2TEXT_HINTS):
|
|
|
|
|
return [LLMType.SPEECH2TEXT.value]
|
|
|
|
|
if cls._contains_hint(model_name, cls._TTS_HINTS):
|
|
|
|
|
return [LLMType.TTS.value]
|
|
|
|
|
|
|
|
|
|
model_types = [LLMType.CHAT.value]
|
|
|
|
|
if cls._contains_hint(model_name, cls._VISION_HINTS):
|
|
|
|
|
model_types.append(LLMType.IMAGE2TEXT.value)
|
|
|
|
|
return model_types
|
|
|
|
|
|
|
|
|
|
def _format_model_list(self, raw_model_list):
|
|
|
|
|
models = raw_model_list.get("data") if isinstance(raw_model_list, dict) else raw_model_list
|
|
|
|
|
if not isinstance(models, list):
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
model_list = []
|
|
|
|
|
for model in models:
|
|
|
|
|
if not isinstance(model, dict):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
model_name = model.get("id") or model.get("name")
|
|
|
|
|
if not model_name:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
model_name_lower = model_name.lower()
|
2026-06-08 04:05:25 -07:00
|
|
|
model_list.append(
|
|
|
|
|
{
|
|
|
|
|
"name": model_name,
|
|
|
|
|
"model_types": self._infer_model_types(model_name_lower),
|
|
|
|
|
"features": [],
|
|
|
|
|
"max_tokens": (model.get("max_tokens") or model.get("max_completion_tokens") or model.get("context_length") or model.get("max_model_len") or 8192),
|
|
|
|
|
}
|
|
|
|
|
)
|
2026-06-08 16:42:17 +08:00
|
|
|
|
|
|
|
|
return model_list
|
|
|
|
|
|
2026-06-08 04:05:25 -07:00
|
|
|
|
2026-06-08 16:42:17 +08:00
|
|
|
class VLLM(OpenAIAPICompatible):
|
|
|
|
|
_FACTORY_NAME = "VLLM"
|
|
|
|
|
|
2026-06-08 04:05:25 -07:00
|
|
|
|
2026-06-08 16:42:17 +08:00
|
|
|
class LMStudio(OpenAIAPICompatible):
|
2026-06-08 04:05:25 -07:00
|
|
|
_FACTORY_NAME = "LM-Studio"
|