diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
index b362365276..d247f2c44d 100644
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@@ -39,6 +39,22 @@ from rag.prompts.generator import vision_llm_describe_prompt
 
 from common.misc_utils import thread_pool_exec
 
+def _qwen3_no_think_extra_body(model_name: str) -> dict[str, bool] | None:
+    """Build DashScope-compatible options that disable Qwen3.x thinking."""
+    if "qwen3." in model_name.lower():
+        return {"enable_thinking": False}
+    return None
+
+
+def _remove_sampling_params(model_name: str, gen_conf: dict | None) -> dict:
+    """Remove sampling options from Qwen3.x CV requests for now."""
+    sanitized_gen_conf = dict(gen_conf or {})
+    if "qwen3." in model_name.lower():
+        for key in ("temperature", "top_p"):
+            sanitized_gen_conf.pop(key, None)
+    return sanitized_gen_conf
+
+
 class Base(ABC):
     def __init__(self, **kwargs):
         # Configure retry parameters
@@ -310,6 +326,9 @@ class QWenCV(GptV4):
         if not base_url:
             base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
         super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
+        # Qwen3.x models can be registered as IMAGE2TEXT and routed through this CV wrapper.
+        # Disable thinking here so parser-side extraction tasks do not emit reasoning text.
+        self.extra_body = _qwen3_no_think_extra_body(self.model_name) or self.extra_body
 
     @staticmethod
     def _extract_text_from_content(content):
@@ -345,6 +364,7 @@ class QWenCV(GptV4):
         return "Please summarize this video in proper sentences."
 
     async def async_chat(self, system, history, gen_conf, images=None, video_bytes=None, filename="", **kwargs):
+        gen_conf = _remove_sampling_params(self.model_name, gen_conf)
         if video_bytes:
             try:
                 summary, summary_num_tokens = self._process_video(video_bytes, filename, self._resolve_video_prompt(system, history, **kwargs))