mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Fix: model list (#15839)
### What problem does this PR solve? Dedup api_key and migrate `is_tools `in migration. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@@ -569,6 +569,12 @@ class TenantModelInstanceStage(MigrationStage):
|
||||
logger.info("No records to migrate")
|
||||
return 0, []
|
||||
|
||||
# Deduplicate records where api_keys differ only by is_tools encoding.
|
||||
# When _encode_api_key_config wraps a plain api_key into {"api_key": "...", "is_tools": true/false},
|
||||
# multiple tenant_llm rows for the same provider can have logically identical api_keys that
|
||||
# only differ in the is_tools field. We merge these by stripping is_tools for comparison.
|
||||
records = self._dedup_api_key_records(records)
|
||||
|
||||
logger.info(f"Migrating {len(records)} tenant_model_instance records...")
|
||||
|
||||
if self.dry_run:
|
||||
@@ -605,6 +611,95 @@ class TenantModelInstanceStage(MigrationStage):
|
||||
|
||||
return rows_inserted, self.target_tables
|
||||
|
||||
@staticmethod
|
||||
def _strip_is_tools_from_api_key(api_key: str, llm_factory: str) -> str:
|
||||
"""Strip is_tools from api_key for dedup comparison.
|
||||
|
||||
Handles three api_key formats:
|
||||
1. Plain string (e.g. "sk-xxx" or "x") — returned as-is.
|
||||
2. JSON with only {"api_key": "...", "is_tools": true/false} — extract the inner api_key value.
|
||||
3. JSON with factory-specific fields + optional "is_tools" — remove only the "is_tools" key.
|
||||
|
||||
For format 3, the factory-specific JSON structures are:
|
||||
VolcEngine: {"ark_api_key": ..., "endpoint_id": ...}
|
||||
Tencent Cloud: {"tencent_cloud_sid": ..., "tencent_cloud_sk": ...}
|
||||
Bedrock: {"auth_mode": ..., "bedrock_ak": ..., "bedrock_sk": ..., "bedrock_region": ..., "aws_role_arn": ...}
|
||||
XunFei Spark (tts): {"spark_app_id": ..., "spark_api_secret": ..., "spark_api_key": ...}
|
||||
BaiduYiyan: {"yiyan_ak": ..., "yiyan_sk": ...}
|
||||
Fish Audio: {"fish_audio_ak": ..., "fish_audio_refid": ...}
|
||||
Google Cloud: {"google_project_id": ..., "google_region": ..., "google_service_account_key": ...}
|
||||
Azure-OpenAI: {"api_key": ..., "api_version": ...}
|
||||
OpenRouter: {"api_key": ..., "provider_order": ...}
|
||||
MinerU: {"api_key": ..., "provider_order": ...}
|
||||
PaddleOCR: {"api_key": ..., "provider_order": ...}
|
||||
OpenDataLoader: {"api_key": ..., "provider_order": ...}
|
||||
"""
|
||||
if not api_key:
|
||||
return api_key
|
||||
|
||||
try:
|
||||
parsed = json.loads(api_key)
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
return api_key
|
||||
|
||||
if not isinstance(parsed, dict):
|
||||
return api_key
|
||||
|
||||
# Case 2: {"api_key": "...", "is_tools": true/false} — extract inner api_key
|
||||
if set(parsed.keys()) <= {"api_key", "is_tools"}:
|
||||
return parsed.get("api_key", "")
|
||||
|
||||
# Case 3: factory-specific JSON with is_tools appended — remove is_tools key
|
||||
if "is_tools" in parsed:
|
||||
payload = {k: v for k, v in parsed.items() if k != "is_tools"}
|
||||
return json.dumps(payload, sort_keys=True)
|
||||
|
||||
# Already a JSON dict without is_tools — return as-is
|
||||
return json.dumps(parsed, sort_keys=True)
|
||||
|
||||
def _dedup_api_key_records(self, records: list) -> list:
|
||||
"""Deduplicate records whose api_keys are logically identical after stripping is_tools.
|
||||
|
||||
Groups by (tenant_id, llm_factory, provider_id). Within each group, if multiple
|
||||
records share the same canonical api_key (with is_tools removed), only one is kept.
|
||||
The kept record uses the original api_key value from the first occurrence; is_tools
|
||||
information is not needed in tenant_model_instance (it is stored in tenant_model instead).
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
groups = defaultdict(list)
|
||||
for rec in records:
|
||||
tenant_id, llm_factory, api_key, status, provider_id = rec
|
||||
groups[(tenant_id, llm_factory, provider_id)].append(rec)
|
||||
|
||||
deduped = []
|
||||
dup_count = 0
|
||||
for (tenant_id, llm_factory, provider_id), group in groups.items():
|
||||
if len(group) <= 1:
|
||||
deduped.extend(group)
|
||||
continue
|
||||
|
||||
# Multiple records in group — dedup by canonical api_key
|
||||
seen = {} # canonical_key -> first record
|
||||
for rec in group:
|
||||
_, _, api_key, _, _ = rec
|
||||
canonical = self._strip_is_tools_from_api_key(api_key, llm_factory)
|
||||
if canonical not in seen:
|
||||
seen[canonical] = rec
|
||||
else:
|
||||
dup_count += 1
|
||||
logger.debug(
|
||||
f"Dedup api_key for tenant={tenant_id}, factory={llm_factory}, "
|
||||
f"provider={provider_id}: keeping '{api_key[:20]}...', "
|
||||
f"dropping '{seen[canonical][2][:20]}...'"
|
||||
)
|
||||
deduped.extend(seen.values())
|
||||
|
||||
if dup_count > 0:
|
||||
logger.info(f"Deduplicated {dup_count} api_key records (is_tools-only differences)")
|
||||
|
||||
return deduped
|
||||
|
||||
def create_target_table(self):
|
||||
"""Create tenant_model_instance table"""
|
||||
create_sql = """
|
||||
@@ -764,7 +859,7 @@ class TenantModelStage(MigrationStage):
|
||||
# Migrate status='0' records, plus status='1' for empty-llm factories
|
||||
cursor = self.db.execute_sql(
|
||||
f"SELECT tl.id, tl.llm_name, tmp.id as provider_id, tmi.id as instance_id, "
|
||||
f" tl.model_type, tl.status "
|
||||
f" tl.model_type, tl.status, tl.api_key "
|
||||
f"FROM tenant_llm tl "
|
||||
f"INNER JOIN tenant_model_provider tmp ON tmp.tenant_id = tl.tenant_id AND tmp.provider_name = tl.llm_factory "
|
||||
f"INNER JOIN tenant_model_instance tmi ON tmi.provider_id = tmp.id AND tmi.api_key = tl.api_key "
|
||||
@@ -785,7 +880,7 @@ class TenantModelStage(MigrationStage):
|
||||
|
||||
if self.dry_run:
|
||||
logger.info(f"[DRY RUN] Would insert {len(records)} records")
|
||||
for source_id, llm_name, provider_id, instance_id, model_type, status in records[:5]:
|
||||
for source_id, llm_name, provider_id, instance_id, model_type, status, api_key in records[:5]:
|
||||
logger.info(f" model_name={llm_name}, provider_id={provider_id}, "
|
||||
f"instance_id={instance_id}, model_type={model_type}")
|
||||
if len(records) > 5:
|
||||
@@ -797,19 +892,23 @@ class TenantModelStage(MigrationStage):
|
||||
for i in range(0, len(records), batch_size):
|
||||
batch = records[i:i + batch_size]
|
||||
values = []
|
||||
for source_id, llm_name, provider_id, instance_id, model_type, status in batch:
|
||||
for source_id, llm_name, provider_id, instance_id, model_type, status, api_key in batch:
|
||||
record_id = self.generate_uuid()
|
||||
model_name_escaped = llm_name.replace("'", "''") if llm_name else ""
|
||||
model_type_escaped = model_type.replace("'", "''") if model_type else ""
|
||||
status_val = "active" if status in ["1", "active", "enable"] else "inactive"
|
||||
# Extract is_tools from api_key JSON and put it in extra
|
||||
extra = self._extract_extra_from_api_key(api_key)
|
||||
extra_escaped = extra.replace("'", "''") if extra else "{}"
|
||||
values.append(f"('{record_id}', '{model_name_escaped}', '{provider_id}', "
|
||||
f"'{instance_id}', '{model_type_escaped}', '{status_val}', "
|
||||
f"'{extra_escaped}', "
|
||||
f"{current_ts * 1000}, FROM_UNIXTIME({current_ts}), "
|
||||
f"{current_ts * 1000}, FROM_UNIXTIME({current_ts}))")
|
||||
|
||||
insert_sql = f"""
|
||||
INSERT INTO tenant_model
|
||||
(id, model_name, provider_id, instance_id, model_type, status,
|
||||
(id, model_name, provider_id, instance_id, model_type, status, extra,
|
||||
create_time, create_date, update_time, update_date)
|
||||
VALUES {', '.join(values)}
|
||||
"""
|
||||
@@ -819,6 +918,29 @@ class TenantModelStage(MigrationStage):
|
||||
|
||||
return rows_inserted, self.target_tables
|
||||
|
||||
@staticmethod
|
||||
def _extract_extra_from_api_key(api_key: str) -> str:
|
||||
"""Extract is_tools from api_key JSON and return an extra JSON string for tenant_model.
|
||||
|
||||
If api_key is a JSON dict containing "is_tools": true, return '{"is_tools": true}'.
|
||||
Otherwise return '{}' (empty dict).
|
||||
"""
|
||||
if not api_key:
|
||||
return "{}"
|
||||
|
||||
try:
|
||||
parsed = json.loads(api_key)
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
return "{}"
|
||||
|
||||
if not isinstance(parsed, dict):
|
||||
return "{}"
|
||||
|
||||
if parsed.get("is_tools") is True:
|
||||
return json.dumps({"is_tools": True})
|
||||
|
||||
return "{}"
|
||||
|
||||
def create_target_table(self):
|
||||
"""Create tenant_model table"""
|
||||
create_sql = """
|
||||
|
||||
Reference in New Issue
Block a user