diff --git a/agent/component/browser.py b/agent/component/browser.py index 4ec8ad290e..c7f77b1577 100644 --- a/agent/component/browser.py +++ b/agent/component/browser.py @@ -83,6 +83,15 @@ class BrowserParam(LLMParam): class Browser(ComponentBase, ABC): component_name = "Browser" + def _prepare_input_values(self): + for key, meta in self.get_input_elements().items(): + val = meta.get("value") + if val is None: + val = "" + elif not isinstance(val, str): + val = json.dumps(val, ensure_ascii=False) + self.set_input_value(key, val) + def get_input_elements(self) -> dict[str, dict]: text_parts = [ str(self._param.prompts or ""), @@ -416,12 +425,17 @@ class Browser(ComponentBase, ABC): llm_kwargs = {k: v for k, v in llm_kwargs.items() if v not in (None, "")} return ChatBrowserUse(**llm_kwargs) + # browser-use Agent defaults to json_schema response_format and may use tool_choice via + # ChatDeepSeek. Many providers (e.g. DeepSeek thinking models) reject both. Use ChatOpenAI + # with schema-in-prompt and without forced structured output on the first run. llm_kwargs = { "model": model_name, "api_key": cfg.get("api_key"), "base_url": base_url, "temperature": self._param.temperature, "max_retries": self._param.max_retries, + "add_schema_to_system_prompt": True, + "dont_force_structured_output": True, } llm_kwargs = {k: v for k, v in llm_kwargs.items() if v not in (None, "")} return ChatOpenAI(**llm_kwargs) @@ -666,6 +680,7 @@ class Browser(ComponentBase, ABC): profile_dir = None persist_session = self._should_persist_session() try: + self._prepare_input_values() user_prompt = self._resolve_text(kwargs.get("prompts", self._param.prompts)) with tempfile.TemporaryDirectory(prefix="browser_use_upload_") as upload_dir, tempfile.TemporaryDirectory( prefix="browser_use_download_" diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 6ad7941275..db9a8b7d2f 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -67,6 +67,7 @@ class SupportedLiteLLMProvider(StrEnum): FACTORY_DEFAULT_BASE_URL = { SupportedLiteLLMProvider.Tongyi_Qianwen: "https://dashscope.aliyuncs.com/compatible-mode/v1", SupportedLiteLLMProvider.Dashscope: "https://dashscope.aliyuncs.com/compatible-mode/v1", + SupportedLiteLLMProvider.DeepSeek: "https://api.deepseek.com/v1", SupportedLiteLLMProvider.Moonshot: "https://api.moonshot.cn/v1", SupportedLiteLLMProvider.Ollama: "", SupportedLiteLLMProvider.LongCat: "https://api.longcat.chat/openai", diff --git a/test/unit_test/agent/component/test_browser_use_component.py b/test/unit_test/agent/component/test_browser_use_component.py index b398b3ecab..9b3ff8c39c 100644 --- a/test/unit_test/agent/component/test_browser_use_component.py +++ b/test/unit_test/agent/component/test_browser_use_component.py @@ -53,9 +53,13 @@ class _FakeCanvas: self._refs = refs or {} def is_reff(self, token): - return token in self._refs + key = token.strip("{} ") + return key in self._refs or token in self._refs def get_variable_value(self, token): + key = token.strip("{} ") + if key in self._refs: + return self._refs[key] return self._refs[token] def get_tenant_id(self): @@ -69,6 +73,19 @@ def _build_component(): return component +def test_prepare_input_values_records_variable_inputs(): + component = browser_use_module.Browser.__new__(browser_use_module.Browser) + component._canvas = _FakeCanvas(refs={"sys.query": "open example.com"}) + component._param = browser_use_module.BrowserParam() + component._param.prompts = "{sys.query}" + component._param.inputs = {} + + component._prepare_input_values() + + assert component.get_input_value("sys.query") == "open example.com" + assert component.get_input_values()["sys.query"] == "open example.com" + + def test_extract_ids_supports_mixed_literals_and_variables(): component = browser_use_module.Browser.__new__(browser_use_module.Browser) component._canvas = _FakeCanvas(