Fix: disable thinking to avoid potential infinite loops in Qwen3.5/Qwen3.6 models (#15101)

### What problem does this PR solve?

This PR fixes the issue where Qwen3.5/Qwen3.6 series models may spend
excessive time on simple document-parsing tasks, such as Auto Metadata
extraction, keyword extraction, question generation, and image
description when using the MinerU parser.

For these tasks, Qwen3.5/Qwen3.6 models may perform unnecessary
reasoning by default, which can lead to very long response times, high
token consumption, and, in some cases, potential infinite output loops.

Since Qwen3.5/Qwen3.6 multimodal models are instantiated as `CvModel`
when configured as `image2text`, the existing `enable_thinking=False`
logic in `chat_model.py` does not apply to them. This PR adds the
corresponding handling for the CV/image-to-text model path as well.

This helps reduce unnecessary thinking time, avoid potential infinite
loops, and improve parsing efficiency without noticeably affecting
output quality for these simple extraction and image-description tasks.

Fixes #15083.
This commit is contained in:
Aeovy
2026-06-02 13:21:35 +08:00
committed by GitHub
parent 5b02fe4841
commit 600590cd18

View File

@@ -39,6 +39,22 @@ from rag.prompts.generator import vision_llm_describe_prompt
from common.misc_utils import thread_pool_exec
def _qwen3_no_think_extra_body(model_name: str) -> dict[str, bool] | None:
"""Build DashScope-compatible options that disable Qwen3.x thinking."""
if "qwen3." in model_name.lower():
return {"enable_thinking": False}
return None
def _remove_sampling_params(model_name: str, gen_conf: dict | None) -> dict:
"""Remove sampling options from Qwen3.x CV requests for now."""
sanitized_gen_conf = dict(gen_conf or {})
if "qwen3." in model_name.lower():
for key in ("temperature", "top_p"):
sanitized_gen_conf.pop(key, None)
return sanitized_gen_conf
class Base(ABC):
def __init__(self, **kwargs):
# Configure retry parameters
@@ -310,6 +326,9 @@ class QWenCV(GptV4):
if not base_url:
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
# Qwen3.x models can be registered as IMAGE2TEXT and routed through this CV wrapper.
# Disable thinking here so parser-side extraction tasks do not emit reasoning text.
self.extra_body = _qwen3_no_think_extra_body(self.model_name) or self.extra_body
@staticmethod
def _extract_text_from_content(content):
@@ -345,6 +364,7 @@ class QWenCV(GptV4):
return "Please summarize this video in proper sentences."
async def async_chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs):
gen_conf = _remove_sampling_params(self.model_name, gen_conf)
if video_bytes:
try:
summary, summary_num_tokens = self._process_video(video_bytes, filename, self._resolve_video_prompt(system, history, **kwargs))