From 600590cd18c4b1f9015eb96e09e8ff74024a475a Mon Sep 17 00:00:00 2001 From: Aeovy <108655917+Aeovy@users.noreply.github.com> Date: Tue, 2 Jun 2026 13:21:35 +0800 Subject: [PATCH] Fix: disable thinking to avoid potential infinite loops in Qwen3.5/Qwen3.6 models (#15101) ### What problem does this PR solve? This PR fixes the issue where Qwen3.5/Qwen3.6 series models may spend excessive time on simple document-parsing tasks, such as Auto Metadata extraction, keyword extraction, question generation, and image description when using the MinerU parser. For these tasks, Qwen3.5/Qwen3.6 models may perform unnecessary reasoning by default, which can lead to very long response times, high token consumption, and, in some cases, potential infinite output loops. Since Qwen3.5/Qwen3.6 multimodal models are instantiated as `CvModel` when configured as `image2text`, the existing `enable_thinking=False` logic in `chat_model.py` does not apply to them. This PR adds the corresponding handling for the CV/image-to-text model path as well. This helps reduce unnecessary thinking time, avoid potential infinite loops, and improve parsing efficiency without noticeably affecting output quality for these simple extraction and image-description tasks. Fixes #15083. --- rag/llm/cv_model.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index b362365276..d247f2c44d 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -39,6 +39,22 @@ from rag.prompts.generator import vision_llm_describe_prompt from common.misc_utils import thread_pool_exec +def _qwen3_no_think_extra_body(model_name: str) -> dict[str, bool] | None: + """Build DashScope-compatible options that disable Qwen3.x thinking.""" + if "qwen3." in model_name.lower(): + return {"enable_thinking": False} + return None + + +def _remove_sampling_params(model_name: str, gen_conf: dict | None) -> dict: + """Remove sampling options from Qwen3.x CV requests for now.""" + sanitized_gen_conf = dict(gen_conf or {}) + if "qwen3." in model_name.lower(): + for key in ("temperature", "top_p"): + sanitized_gen_conf.pop(key, None) + return sanitized_gen_conf + + class Base(ABC): def __init__(self, **kwargs): # Configure retry parameters @@ -310,6 +326,9 @@ class QWenCV(GptV4): if not base_url: base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs) + # Qwen3.x models can be registered as IMAGE2TEXT and routed through this CV wrapper. + # Disable thinking here so parser-side extraction tasks do not emit reasoning text. + self.extra_body = _qwen3_no_think_extra_body(self.model_name) or self.extra_body @staticmethod def _extract_text_from_content(content): @@ -345,6 +364,7 @@ class QWenCV(GptV4): return "Please summarize this video in proper sentences." async def async_chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs): + gen_conf = _remove_sampling_params(self.model_name, gen_conf) if video_bytes: try: summary, summary_num_tokens = self._process_video(video_bytes, filename, self._resolve_video_prompt(system, history, **kwargs))