Fix paddle ocr / minerU cannot add (#15858)

Fix paddle ocr / minerU cannot add
This commit is contained in:
Wang Qi
2026-06-10 13:04:13 +08:00
committed by GitHub
parent 7f4bf69f05
commit 9aa81e7cad
8 changed files with 140 additions and 46 deletions

View File

@@ -57,7 +57,7 @@
"component_name": "TavilySearch",
"name": "TavilySearch",
"params": {
"api_key": "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",
"api_key": "",
"days": 7,
"exclude_domains": [],
"include_answer": false,
@@ -651,7 +651,7 @@
"component_name": "TavilySearch",
"name": "TavilySearch",
"params": {
"api_key": "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",
"api_key": "",
"days": 7,
"exclude_domains": [],
"include_answer": false,

View File

@@ -16,6 +16,7 @@
import os
import logging
from api.db.joint_services.tenant_model_service import ensure_mineru_from_env, ensure_paddleocr_from_env
from common.constants import ActiveStatusEnum, LLMType
from common.settings import FACTORY_LLM_INFOS
from api.db.services.tenant_model_provider_service import TenantModelProviderService
@@ -301,6 +302,9 @@ def list_tenant_added_models(tenant_id: str, model_type_filter: str=None):
if not e:
return False, "Tenant not found"
ensure_mineru_from_env(tenant_id)
ensure_paddleocr_from_env(tenant_id)
if model_type_filter:
model_type_filter = model_type_filter.lower()

View File

@@ -18,8 +18,8 @@ import os
import enum
import json
from common import settings
from common.constants import LLMType, ActiveStatusEnum
from api.db.services.tenant_llm_service import TenantLLMService, TenantService
from common.constants import ActiveStatusEnum, LLMType, MINERU_DEFAULT_CONFIG, MINERU_ENV_KEYS, PADDLEOCR_DEFAULT_CONFIG, PADDLEOCR_ENV_KEYS
from api.db.services.tenant_llm_service import TenantService
from api.db.services.tenant_model_provider_service import TenantModelProviderService
from api.db.services.tenant_model_instance_service import TenantModelInstanceService
from api.db.services.tenant_model_service import TenantModelService
@@ -27,6 +27,106 @@ from api.db.services.tenant_model_service import TenantModelService
logger = logging.getLogger(__name__)
def _decode_api_key_config(raw_api_key: str) -> tuple[str, bool | None, str | None]:
if not raw_api_key:
return raw_api_key, None, None
try:
parsed = json.loads(raw_api_key)
except Exception:
return raw_api_key, None, None
if not isinstance(parsed, dict):
return raw_api_key, None, None
is_tools = bool(parsed["is_tools"]) if "is_tools" in parsed else None
if set(parsed.keys()) <= {"api_key", "is_tools"}:
return parsed.get("api_key", ""), is_tools, None
return parsed.get("api_key", raw_api_key), is_tools, raw_api_key
def get_first_provider_model_name(tenant_id: str, provider_name: str, model_type: str | enum.Enum) -> str | None:
model_type_val = model_type if isinstance(model_type, str) else model_type.value
provider_obj = TenantModelProviderService.get_by_tenant_id_and_provider_name(tenant_id, provider_name)
if not provider_obj:
return None
for instance_obj in TenantModelInstanceService.get_all_by_provider_id(provider_obj.id):
if instance_obj.status != ActiveStatusEnum.ACTIVE.value:
continue
for model_obj in TenantModelService.get_models_by_instance_id(instance_obj.id):
if model_obj.model_type == model_type_val and model_obj.status == ActiveStatusEnum.ACTIVE.value:
return f"{model_obj.model_name}@{instance_obj.instance_name}@{provider_name}"
return None
def _collect_env_config(env_keys: list[str], default_config: dict) -> dict | None:
config = dict(default_config)
found = False
for key in env_keys:
value = os.environ.get(key)
if value:
found = True
config[key] = value
return config if found else None
def _ensure_ocr_provider_from_env(tenant_id: str, provider_name: str, model_name: str, config: dict | None) -> str | None:
if not config:
return None
provider_obj = TenantModelProviderService.get_by_tenant_id_and_provider_name(tenant_id, provider_name)
if not provider_obj:
TenantModelProviderService.insert(tenant_id=tenant_id, provider_name=provider_name)
provider_obj = TenantModelProviderService.get_by_tenant_id_and_provider_name(tenant_id, provider_name)
api_key = json.dumps(config)
instance_obj = TenantModelInstanceService.get_by_provider_id_and_api_key(provider_obj.id, api_key)
if not instance_obj:
instance_obj = TenantModelInstanceService.create_instance(
provider_id=provider_obj.id,
instance_name=model_name,
api_key=api_key,
extra="{}",
)
model_obj = TenantModelService.get_by_provider_id_and_instance_id_and_model_type_and_model_name(
provider_obj.id,
instance_obj.id,
LLMType.OCR.value,
model_name,
)
if not model_obj:
TenantModelService.insert(
model_name=model_name,
provider_id=provider_obj.id,
instance_id=instance_obj.id,
model_type=LLMType.OCR.value,
extra=json.dumps({"max_tokens": 0}),
)
return f"{model_name}@{instance_obj.instance_name}@{provider_name}"
def ensure_mineru_from_env(tenant_id: str) -> str | None:
return _ensure_ocr_provider_from_env(
tenant_id,
"MinerU",
"mineru-from-env",
_collect_env_config(MINERU_ENV_KEYS, MINERU_DEFAULT_CONFIG),
)
def ensure_paddleocr_from_env(tenant_id: str) -> str | None:
return _ensure_ocr_provider_from_env(
tenant_id,
"PaddleOCR",
"paddleocr-from-env",
_collect_env_config(PADDLEOCR_ENV_KEYS, PADDLEOCR_DEFAULT_CONFIG),
)
def get_tenant_default_model_by_type(tenant_id: str, model_type: str|enum.Enum):
exist, tenant = TenantService.get_by_id(tenant_id)
if not exist:
@@ -103,7 +203,7 @@ def get_model_config_from_provider_instance(tenant_id, model_type: str|enum.Enum
raise LookupError(f"Instance {instance_name} not found for model {model_name}.")
model_obj = TenantModelService.get_by_provider_id_and_instance_id_and_model_type_and_model_name(provider_obj.id, instance_obj.id, model_type_val, pure_model_name)
api_key, is_tool, api_key_payload = TenantLLMService._decode_api_key_config(instance_obj.api_key)
api_key, is_tool, api_key_payload = _decode_api_key_config(instance_obj.api_key)
extra_fields = json.loads(instance_obj.extra) if instance_obj.extra else {}
if model_obj:

View File

@@ -24,13 +24,13 @@ def normalize_layout_recognizer(layout_recognizer_raw: Any) -> tuple[Any, str |
if isinstance(layout_recognizer_raw, str):
lowered = layout_recognizer_raw.lower()
if lowered.endswith("@mineru"):
parser_model_name = layout_recognizer_raw.rsplit("@", 1)[0]
parser_model_name = layout_recognizer_raw
layout_recognizer = "MinerU"
elif lowered.endswith("@paddleocr"):
parser_model_name = layout_recognizer_raw.rsplit("@", 1)[0]
parser_model_name = layout_recognizer_raw
layout_recognizer = "PaddleOCR"
elif lowered.endswith("@opendataloader"):
parser_model_name = layout_recognizer_raw.rsplit("@", 1)[0]
parser_model_name = layout_recognizer_raw
layout_recognizer = "OpenDataLoader"
return layout_recognizer, parser_model_name

View File

@@ -223,8 +223,6 @@ class PaddleOCRParser(RAGFlowPdfParser):
request_timeout: int = 600,
):
"""Initialize PaddleOCR parser."""
super().__init__()
self.outlines = []
self.api_url = api_url.rstrip("/") if api_url else os.getenv("PADDLEOCR_API_URL", "")
self.access_token = access_token or os.getenv("PADDLEOCR_ACCESS_TOKEN")

View File

@@ -31,7 +31,13 @@ from common.token_utils import num_tokens_from_string
from common.constants import LLMType, MAXIMUM_PAGE_NUMBER
from api.db.services.llm_service import LLMBundle
from api.db.joint_services.tenant_model_service import get_tenant_default_model_by_type, get_model_config_from_provider_instance
from api.db.joint_services.tenant_model_service import (
ensure_mineru_from_env,
ensure_paddleocr_from_env,
get_first_provider_model_name,
get_model_config_from_provider_instance,
get_tenant_default_model_by_type,
)
from rag.utils.file_utils import extract_embed_file, extract_links_from_pdf, extract_links_from_docx, extract_html
from deepdoc.parser import DocxParser, EpubParser, ExcelParser, HtmlParser, JsonParser, MarkdownElementExtractor, MarkdownParser, PdfParser, TxtParser
from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_docx_wrapper_naive, vision_figure_parser_pdf_wrapper
@@ -137,14 +143,7 @@ def by_mineru(
if tenant_id:
if not mineru_llm_name:
try:
from api.db.services.tenant_llm_service import TenantLLMService
env_name = TenantLLMService.ensure_mineru_from_env(tenant_id)
candidates = TenantLLMService.query(tenant_id=tenant_id, llm_factory="MinerU", model_type=LLMType.OCR)
if candidates:
mineru_llm_name = candidates[0].llm_name
elif env_name:
mineru_llm_name = env_name
mineru_llm_name = get_first_provider_model_name(tenant_id, "MinerU", LLMType.OCR) or ensure_mineru_from_env(tenant_id)
except Exception as e: # best-effort fallback
logging.warning(f"fallback to env mineru: {e}")
@@ -281,14 +280,7 @@ def by_paddleocr(
if tenant_id:
if not paddleocr_llm_name:
try:
from api.db.services.tenant_llm_service import TenantLLMService
env_name = TenantLLMService.ensure_paddleocr_from_env(tenant_id)
candidates = TenantLLMService.query(tenant_id=tenant_id, llm_factory="PaddleOCR", model_type=LLMType.OCR)
if candidates:
paddleocr_llm_name = candidates[0].llm_name
elif env_name:
paddleocr_llm_name = env_name
paddleocr_llm_name = get_first_provider_model_name(tenant_id, "PaddleOCR", LLMType.OCR) or ensure_paddleocr_from_env(tenant_id)
except Exception as e: # best-effort fallback
logging.warning(f"fallback to env paddleocr: {e}")

View File

@@ -27,7 +27,13 @@ from PIL import Image
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.db.services.llm_service import LLMBundle
from api.db.joint_services.tenant_model_service import get_tenant_default_model_by_type, get_model_config_from_provider_instance
from api.db.joint_services.tenant_model_service import (
ensure_mineru_from_env,
ensure_paddleocr_from_env,
get_first_provider_model_name,
get_model_config_from_provider_instance,
get_tenant_default_model_by_type,
)
from common import settings
from common.constants import LLMType
from common.misc_utils import get_uuid, thread_pool_exec
@@ -336,10 +342,10 @@ class Parser(ProcessBase):
if isinstance(raw_parse_method, str):
lowered = raw_parse_method.lower()
if lowered.endswith("@mineru"):
parser_model_name = raw_parse_method.rsplit("@", 1)[0]
parser_model_name = raw_parse_method
parse_method = "MinerU"
elif lowered.endswith("@paddleocr"):
parser_model_name = raw_parse_method.rsplit("@", 1)[0]
parser_model_name = raw_parse_method
parse_method = "PaddleOCR"
# DeepDOC returns structured page boxes directly.
@@ -368,13 +374,7 @@ class Parser(ProcessBase):
if not tenant_id:
return None
from api.db.services.tenant_llm_service import TenantLLMService
env_name = TenantLLMService.ensure_mineru_from_env(tenant_id)
candidates = TenantLLMService.query(tenant_id=tenant_id, llm_factory="MinerU", model_type=LLMType.OCR.value)
if candidates:
return candidates[0].llm_name
return env_name
return get_first_provider_model_name(tenant_id, "MinerU", LLMType.OCR) or ensure_mineru_from_env(tenant_id)
parser_model_name = resolve_mineru_llm_name()
if not parser_model_name:
@@ -550,13 +550,7 @@ class Parser(ProcessBase):
if not tenant_id:
return None
from api.db.services.tenant_llm_service import TenantLLMService
env_name = TenantLLMService.ensure_paddleocr_from_env(tenant_id)
candidates = TenantLLMService.query(tenant_id=tenant_id, llm_factory="PaddleOCR", model_type=LLMType.OCR.value)
if candidates:
return candidates[0].llm_name
return env_name
return get_first_provider_model_name(tenant_id, "PaddleOCR", LLMType.OCR) or ensure_paddleocr_from_env(tenant_id)
parser_model_name = resolve_paddleocr_llm_name()
if not parser_model_name:

View File

@@ -766,7 +766,10 @@ export const ProviderConfigMap: Record<string, ProviderConfig> = {
return {
apiKey: cfg,
baseUrl: values.paddleocr_api_url,
modelInfo: buildModelInfoFromValues(values),
modelInfo: buildModelInfoFromValues({
...values,
model_type: ['ocr'],
}),
};
},
submitTransform: (values) => {
@@ -782,7 +785,10 @@ export const ProviderConfigMap: Record<string, ProviderConfig> = {
llm_factory: LLMFactory.PaddleOCR,
api_key: cfg,
api_base: '',
model_info: buildModelInfoFromValues(values),
model_info: buildModelInfoFromValues({
...values,
model_type: ['ocr'],
}),
};
},
},