From 78c3583964ed1857ad64e6c2d08b05e3f392a899 Mon Sep 17 00:00:00 2001
From: NeedmeFordev <124189514+spider-yamet@users.noreply.github.com>
Date: Mon, 20 Apr 2026 01:37:36 -0700
Subject: [PATCH] Fix memory resolution regression for multimodal Gemini models
 (#14209)

### What problem does this PR solve?

Fixes #14206.

This issue is a regression. PR #9520 previously changed Gemini models
from `image2text` to `chat` to fix chat-side resolution, but PR #13073
later restored those Gemini entries to `image2text` during model-list
updates, which reintroduced the bug.

The underlying problem is that Gemini models are multimodal and
advertise both `CHAT` and `IMAGE2TEXT`, while tenant model resolution
still depends on a single stored `model_type`. That makes chat-only
flows such as memory extraction fragile when a compatible model is
stored as `image2text`.

This PR fixes the issue at the model resolution layer instead of
changing `llm_factories.json` again:
- keep the stored tenant model type unchanged
- try exact `model_type` lookup first
- if no exact match is found, fall back only when the model metadata
shows the requested capability is supported
- coerce the runtime config to the requested type for chat callers
- fail fast in memory creation instead of silently persisting
`tenant_llm_id=0`

This preserves existing multimodal and `image2text` behavior while
restoring chat compatibility for memory-related flows.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

### Testing

- Re-checked the current memory creation and memory message extraction
paths against the updated resolution logic
- Verified locally that a Gemini-style tenant model stored as
`image2text` but tagged with `CHAT` can still be resolved for `chat`
- Verified `get_model_config_by_type_and_name(..., CHAT, ...)` returns a
chat-compatible runtime config
- Verified `get_model_config_by_id(..., CHAT)` also returns a
chat-compatible runtime config
- Verified strict resolution still fails when the model metadata does
not advertise chat capability
---
 api/apps/restful_apis/memory_api.py |  8 ++++++--
 api/utils/tenant_utils.py           | 10 +++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/api/apps/restful_apis/memory_api.py b/api/apps/restful_apis/memory_api.py
index 672adde6ea..8f92661e70 100644
--- a/api/apps/restful_apis/memory_api.py
+++ b/api/apps/restful_apis/memory_api.py
@@ -18,7 +18,7 @@ import os
 import time
 
 from quart import request
-from common.constants import RetCode
+from common.constants import LLMType, RetCode
 from common.exceptions import ArgumentException, NotFoundException
 from api.apps import login_required, current_user
 from api.utils.api_utils import validate_request, get_request_json, get_error_argument_result, get_json_result
@@ -33,9 +33,13 @@ async def create_memory():
     timing_enabled = os.getenv("RAGFLOW_API_TIMING")
     t_start = time.perf_counter() if timing_enabled else None
     req = await get_request_json()
-    req = ensure_tenant_model_id_for_params(current_user.id, req)
     t_parsed = time.perf_counter() if timing_enabled else None
     try:
+        req = ensure_tenant_model_id_for_params(current_user.id, req)
+        if not req.get("tenant_llm_id"):
+            raise ArgumentException(
+                f"Tenant Model with name {req['llm_id']} and type {LLMType.CHAT.value} not found"
+            )
         memory_info = {
             "name": req["name"],
             "memory_type": req["memory_type"],
diff --git a/api/utils/tenant_utils.py b/api/utils/tenant_utils.py
index 83da91f1c4..80f75b6fd6 100644
--- a/api/utils/tenant_utils.py
+++ b/api/utils/tenant_utils.py
@@ -14,6 +14,7 @@
 #  limitations under the License.
 #
 from common.constants import LLMType
+from common.exceptions import ArgumentException
 from api.db.services.tenant_llm_service import TenantLLMService
 
 _KEY_TO_MODEL_TYPE = {
@@ -25,13 +26,20 @@ _KEY_TO_MODEL_TYPE = {
     "tts_id": LLMType.TTS,
 }
 
-def ensure_tenant_model_id_for_params(tenant_id: str, param_dict: dict) -> dict:
+def ensure_tenant_model_id_for_params(tenant_id: str, param_dict: dict, *, strict: bool = False) -> dict:
     for key in ["llm_id", "embd_id", "asr_id", "img2txt_id", "rerank_id", "tts_id"]:
         if param_dict.get(key) and not param_dict.get(f"tenant_{key}"):
             model_type = _KEY_TO_MODEL_TYPE.get(key)
             tenant_model = TenantLLMService.get_api_key(tenant_id, param_dict[key], model_type)
+            if not tenant_model and model_type == LLMType.CHAT:
+                tenant_model = TenantLLMService.get_api_key(tenant_id, param_dict[key])
             if tenant_model:
                 param_dict.update({f"tenant_{key}": tenant_model.id})
             else:
+                if strict:
+                    model_type_val = model_type.value if hasattr(model_type, "value") else model_type
+                    raise ArgumentException(
+                        f"Tenant Model with name {param_dict[key]} and type {model_type_val} not found"
+                    )
                 param_dict.update({f"tenant_{key}": 0})
     return param_dict