From 1a1b5aa53e4eaa53e3cc50f6c7b88c24be7e6866 Mon Sep 17 00:00:00 2001 From: NeedmeFordev <124189514+spider-yamet@users.noreply.github.com> Date: Tue, 14 Apr 2026 04:55:20 -0700 Subject: [PATCH] Fix: respect the internet toggle before running Tavily web search (#14051) (#14052) ### What problem does this PR solve? Fixes #14051. The chat UI already sends an `internet` flag with each request, but the backend previously triggered Tavily web retrieval whenever `prompt_config.tavily_api_key` was configured. As a result, web search could still run even when the internet toggle was off. This PR makes web search an explicit opt-in at request time: - `tavily_api_key` only indicates that web search is available - Tavily retrieval runs only when `internet` is explicitly enabled - the same behavior now applies to both the normal retrieval path and the deep-research / reasoning path This also fixes the no-KB fallback case so chats without KBs fall back to normal solo chat when `internet` is off. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/db/services/dialog_service.py | 28 +++++++++++++++++-- ...tructured_query_decomposition_retrieval.py | 6 ++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index 83f79c285a..ede9bb0019 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -49,6 +49,27 @@ from common.string_utils import remove_redundant_spaces from common import settings +def _normalize_internet_flag(value): + if isinstance(value, bool): + return value + if isinstance(value, (int, float)) and value in (0, 1): + return bool(value) + if isinstance(value, str): + normalized = value.strip().lower() + if normalized in {"true", "1", "yes", "on"}: + return True + if normalized in {"false", "0", "no", "off", ""}: + return False + return None + + +def _should_use_web_search(prompt_config, internet=None): + if not prompt_config.get("tavily_api_key"): + return False + normalized = _normalize_internet_flag(internet) + return normalized is True + + class DialogService(CommonService): model = Dialog @@ -461,7 +482,9 @@ def repair_bad_citation_formats(answer: str, kbinfos: dict, idx: set): async def async_chat(dialog, messages, stream=True, **kwargs): logging.debug("Begin async_chat") assert messages[-1]["role"] == "user", "The last content of this conversation is not from user." - if not dialog.kb_ids and not dialog.prompt_config.get("tavily_api_key"): + use_web_search = _should_use_web_search(dialog.prompt_config, kwargs.get("internet")) + logging.debug("web_search kb=%s tavily=%s internet=%r enabled=%s", bool(dialog.kb_ids), bool(dialog.prompt_config.get("tavily_api_key")), kwargs.get("internet"), use_web_search) + if not dialog.kb_ids and not use_web_search: async for ans in async_chat_solo(dialog, messages, stream): yield ans return @@ -587,6 +610,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs): vector_similarity_weight=0.3, doc_ids=attachments, ), + internet_enabled=use_web_search, ) queue = asyncio.Queue() async def callback(msg:str): @@ -629,7 +653,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs): if cks: kbinfos["chunks"] = cks kbinfos["chunks"] = retriever.retrieval_by_children(kbinfos["chunks"], tenant_ids) - if prompt_config.get("tavily_api_key"): + if use_web_search: tav = Tavily(prompt_config["tavily_api_key"]) tav_res = tav.retrieve_chunks(" ".join(questions)) kbinfos["chunks"].extend(tav_res["chunks"]) diff --git a/rag/advanced_rag/tree_structured_query_decomposition_retrieval.py b/rag/advanced_rag/tree_structured_query_decomposition_retrieval.py index 214485c3b0..11af6aa46b 100644 --- a/rag/advanced_rag/tree_structured_query_decomposition_retrieval.py +++ b/rag/advanced_rag/tree_structured_query_decomposition_retrieval.py @@ -28,12 +28,14 @@ class TreeStructuredQueryDecompositionRetrieval: chat_mdl: LLMBundle, prompt_config: dict, kb_retrieve: partial = None, - kg_retrieve: partial = None + kg_retrieve: partial = None, + internet_enabled: bool = False, ): self.chat_mdl = chat_mdl self.prompt_config = prompt_config self._kb_retrieve = kb_retrieve self._kg_retrieve = kg_retrieve + self.internet_enabled = internet_enabled self._lock = asyncio.Lock() async def _retrieve_information(self, search_query): @@ -47,7 +49,7 @@ class TreeStructuredQueryDecompositionRetrieval: # 2. Web retrieval (if Tavily API is configured) try: - if self.prompt_config.get("tavily_api_key"): + if self.internet_enabled and self.prompt_config.get("tavily_api_key"): tav = Tavily(self.prompt_config["tavily_api_key"]) tav_res = tav.retrieve_chunks(search_query) kbinfos["chunks"].extend(tav_res["chunks"])