fix: strip Ollama-style tag suffix from LocalAI model names (#15908)

## Summary LocalAI exposes two API surfaces with conflicting naming conventions: - `GET /api/tags` returns model names with `:latest` suffix (Ollama format) - `POST /v1/chat/completions` expects names without `:latest` (OpenAI format) RAGFlow discovered models via `/api/tags` and stored the tagged name, then used it with `/v1/chat/completions`, causing a 404 error because LocalAI didn't recognize `model:latest`. ## Fix In `LocalAI.get_model_list()`, strip the tag suffix from model names using `model["name"].rsplit(":", 1)[0]`, so stored names match what the OpenAI-compatible endpoints expect.
2026-06-29 15:31:05 +08:00 · 2026-06-10 19:05:05 +08:00
parent 7355db183f
commit 0d3e410826
2 changed files with 145 additions and 1 deletions
--- a/rag/llm/model_meta.py
+++ b/rag/llm/model_meta.py
@@ -259,7 +259,7 @@ class LocalAI(Base):
                    context_length = model_info.get("model_info", {}).get("general.context_length", 8192)
                    res.append(
                        {
-                            "name": model["name"],
+                            "name": model["name"].rsplit(":", 1)[0],
                            "model_types": [capability_to_model_type_mapping[c] for c in model_info.get("capabilities", []) if c in capability_to_model_type_mapping],
                            "features": [capability_to_feature_mapping[c] for c in model_info.get("capabilities", []) if c in capability_to_feature_mapping],
                            "max_tokens": context_length or 8192,
--- a/test/unit_test/rag/llm/test_localai_model_list.py
+++ b/test/unit_test/rag/llm/test_localai_model_list.py
@@ -0,0 +1,144 @@
 #
 #  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 """Unit tests for LocalAI.get_model_list() — verify Ollama-style tag suffix
 (e.g. ``:latest``) is stripped from model names returned by ``/api/tags``
 so that names match what the OpenAI-compatible ``/v1/chat/completions``
 endpoint expects."""
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from rag.llm.model_meta import LocalAI
 pytestmark = pytest.mark.p2
 def _build_tags_response(models):
    return {"models": models}
 def _build_show_response(capabilities=None, context_length=8192):
    return {
        "model_info": {"general.context_length": context_length},
        "capabilities": capabilities or ["completion"],
    }
 def _make_async_cm(response_mock):
    """Wrap a mock so ``async with obj as x:`` binds x = response_mock."""
    wrapper = MagicMock()
    wrapper.__aenter__ = AsyncMock(return_value=response_mock)
    wrapper.__aexit__ = AsyncMock(return_value=None)
    return wrapper
 def _make_mock_resp(json_payload, status=200):
    """Build a response mock usable inside ``async with resp:``."""
    resp = MagicMock()
    resp.status = status
    resp.json = AsyncMock(return_value=json_payload)
    return resp
 async def _run_test(models_in, expected_names_out):
    """Core helper: mock /api/tags + /api/show, call get_model_list, assert names."""
    tags_payload = _build_tags_response(models_in)
    show_payload = _build_show_response()
    tags_resp = _make_mock_resp(tags_payload)
    show_resps = [_make_mock_resp(show_payload) for _ in models_in]
    # session.get returns a context-manager wrapper over tags_resp
    # session.post returns a list of context-manager wrappers (one per model)
    session = MagicMock()
    session.get.return_value = _make_async_cm(tags_resp)
    session.post.side_effect = [_make_async_cm(r) for r in show_resps]
    # aiohttp.ClientSession() is used as async context manager, so its
    # __aenter__ must return `session`.
    session_cls = MagicMock()
    session_cls.__aenter__ = AsyncMock(return_value=session)
    session_cls.__aexit__ = AsyncMock(return_value=None)
    with patch("aiohttp.ClientSession", return_value=session_cls):
        result = await LocalAI(api_key="", base_url="http://127.0.0.1:8080").get_model_list()
    assert [m["name"] for m in result] == expected_names_out
@pytest.mark.asyncio
 async def test_get_model_list_strips_latest_tag():
    await _run_test(
        [
            {"name": "lfm2.5-8b-a1b:latest", "model": "lfm2.5-8b-a1b:latest"},
            {"name": "qwen_qwen3.5-2b:latest", "model": "qwen_qwen3.5-2b:latest"},
        ],
        ["lfm2.5-8b-a1b", "qwen_qwen3.5-2b"],
    )
@pytest.mark.asyncio
 async def test_get_model_list_strips_custom_tag():
    await _run_test(
        [
            {"name": "llama3:7b", "model": "llama3:7b"},
            {"name": "mistral:q4_K_M", "model": "mistral:q4_K_M"},
        ],
        ["llama3", "mistral"],
    )
@pytest.mark.asyncio
 async def test_get_model_list_no_tag_unchanged():
    await _run_test(
        [
            {"name": "gemma2-9b", "model": "gemma2-9b"},
            {"name": "phi3-mini", "model": "phi3-mini"},
        ],
        ["gemma2-9b", "phi3-mini"],
    )
@pytest.mark.asyncio
 async def test_get_model_list_name_with_multiple_colons():
    await _run_test(
        [{"name": "registry.example.com/team/model:v1.0", "model": "registry.example.com/team/model:v1.0"}],
        ["registry.example.com/team/model"],
    )
@pytest.mark.asyncio
 async def test_get_model_list_no_base_url_returns_empty():
    result = await LocalAI(api_key="", base_url=None).get_model_list()
    assert result == []
@pytest.mark.asyncio
 async def test_get_model_list_tags_endpoint_non_200_returns_empty():
    tags_resp = _make_mock_resp({}, status=500)
    session = MagicMock()
    session.get.return_value = _make_async_cm(tags_resp)
    session_cls = MagicMock()
    session_cls.__aenter__ = AsyncMock(return_value=session)
    session_cls.__aexit__ = AsyncMock(return_value=None)
    with patch("aiohttp.ClientSession", return_value=session_cls):
        result = await LocalAI(api_key="", base_url="http://127.0.0.1:8080").get_model_list()
    assert result == []