fix(api): fall back to factory max_tokens for tenant models (#16364)

This commit is contained in:
Harsh Kashyap
2026-07-01 13:30:13 +05:30
committed by GitHub
parent 7862f69f39
commit d770217b25
2 changed files with 141 additions and 1 deletions

View File

@@ -45,6 +45,19 @@ def _factory_model_types(llm: dict) -> list[str]:
return [model_type] if model_type else []
def _lookup_factory_llm_info(provider_name: str, pure_model_name: str, extra_fields: dict) -> dict | None:
region = extra_fields.get("region", "default")
if region == "intl" and provider_name.lower() == "siliconflow":
target_factory_name = "siliconflow_intl"
else:
target_factory_name = provider_name
fac_list = [f for f in settings.FACTORY_LLM_INFOS if f["name"] == target_factory_name]
if not fac_list:
return None
llm_list = [llm for llm in fac_list[0]["llm"] if llm["llm_name"] == pure_model_name]
return llm_list[0] if llm_list else None
def _decode_api_key_config(raw_api_key: str) -> tuple[str, bool | None, str | None]:
if not raw_api_key:
return raw_api_key, None, None
@@ -249,6 +262,11 @@ def get_model_config_from_provider_instance(tenant_id, model_type: str | enum.En
raise LookupError(f"Model {model_name} cannot be used as {model_type_val} model.")
model_extra = json.loads(model_obj.extra) if model_obj.extra else {}
llm_info = _lookup_factory_llm_info(provider_obj.provider_name, pure_model_name, extra_fields)
if "max_tokens" in model_extra:
max_tokens = model_extra["max_tokens"]
else:
max_tokens = (llm_info or {}).get("max_tokens", 8192)
model_config = {
"llm_factory": provider_obj.provider_name,
"api_key": api_key,
@@ -256,7 +274,7 @@ def get_model_config_from_provider_instance(tenant_id, model_type: str | enum.En
"api_base": extra_fields.get("base_url", ""),
"model_type": model_obj.model_type,
"is_tools": model_extra.get("is_tools", is_tool),
"max_tokens": model_extra.get("max_tokens") or 8192,
"max_tokens": max_tokens,
}
if provider_name.lower() == "somark":
# SoMark/OCR factories read parser config (somark_*, parse_method, ...)