2025-08-29 14:15:40 +08:00
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import os
import time
from abc import ABC
import requests
from agent . tools . base import ToolMeta , ToolParamBase , ToolBase
2025-11-04 11:51:12 +08:00
from common . connection_utils import timeout
2026-04-25 15:30:15 +09:00
from common . ssrf_guard import assert_url_is_safe , pin_dns
2025-08-29 14:15:40 +08:00
class SearXNGParam ( ToolParamBase ) :
"""
Define the SearXNG component parameters .
"""
def __init__ ( self ) :
self . meta : ToolMeta = {
" name " : " searxng_search " ,
" description " : " SearXNG is a privacy-focused metasearch engine that aggregates results from multiple search engines without tracking users. It provides comprehensive web search capabilities. " ,
" parameters " : {
" query " : {
" type " : " string " ,
" description " : " The search keywords to execute with SearXNG. The keywords should be the most important words/terms(includes synonyms) from the original request. " ,
" default " : " {sys.query} " ,
2026-04-25 15:30:15 +09:00
" required " : True ,
2025-08-29 14:15:40 +08:00
} ,
" searxng_url " : {
" type " : " string " ,
" description " : " The base URL of your SearXNG instance (e.g., http://localhost:4000). This is required to connect to your SearXNG server. " ,
" required " : False ,
2026-04-25 15:30:15 +09:00
" default " : " " ,
} ,
} ,
2025-08-29 14:15:40 +08:00
}
super ( ) . __init__ ( )
self . top_n = 10
self . searxng_url = " "
def check ( self ) :
# Keep validation lenient so opening try-run panel won't fail without URL.
# Coerce top_n to int if it comes as string from UI.
try :
if isinstance ( self . top_n , str ) :
self . top_n = int ( self . top_n . strip ( ) )
except Exception :
pass
self . check_positive_integer ( self . top_n , " Top N " )
def get_input_form ( self ) - > dict [ str , dict ] :
2026-04-25 15:30:15 +09:00
return { " query " : { " name " : " Query " , " type " : " line " } , " searxng_url " : { " name " : " SearXNG URL " , " type " : " line " , " placeholder " : " http://localhost:4000 " } }
2025-08-29 14:15:40 +08:00
class SearXNG ( ToolBase , ABC ) :
component_name = " SearXNG "
2025-09-25 14:11:09 +08:00
@timeout ( int ( os . environ . get ( " COMPONENT_EXEC_TIMEOUT " , 12 ) ) )
2025-08-29 14:15:40 +08:00
def _invoke ( self , * * kwargs ) :
2025-11-11 17:36:48 +08:00
if self . check_if_canceled ( " SearXNG processing " ) :
return
2025-08-29 14:15:40 +08:00
# Gracefully handle try-run without inputs
query = kwargs . get ( " query " )
if not query or not isinstance ( query , str ) or not query . strip ( ) :
self . set_output ( " formalized_content " , " " )
return " "
2025-10-09 16:56:23 +08:00
searxng_url = ( getattr ( self . _param , " searxng_url " , " " ) or kwargs . get ( " searxng_url " ) or " " ) . strip ( )
2025-08-29 14:15:40 +08:00
# In try-run, if no URL configured, just return empty instead of raising
if not searxng_url :
self . set_output ( " formalized_content " , " " )
return " "
2026-04-25 15:30:15 +09:00
try :
_ssrf_hostname , _ssrf_ip = assert_url_is_safe ( searxng_url )
except ValueError as e :
self . set_output ( " _ERROR " , str ( e ) )
return f " SearXNG error: SSRF guard blocked { searxng_url !r} : { e } "
2025-08-29 14:15:40 +08:00
last_e = " "
2026-04-25 15:30:15 +09:00
for _ in range ( self . _param . max_retries + 1 ) :
2025-11-11 17:36:48 +08:00
if self . check_if_canceled ( " SearXNG processing " ) :
return
2025-08-29 14:15:40 +08:00
try :
2026-04-25 15:30:15 +09:00
search_params = { " q " : query , " format " : " json " , " categories " : " general " , " language " : " auto " , " safesearch " : 1 , " pageno " : 1 }
with pin_dns ( _ssrf_hostname , _ssrf_ip ) :
response = requests . get ( f " { searxng_url } /search " , params = search_params , timeout = 10 )
2025-08-29 14:15:40 +08:00
response . raise_for_status ( )
2025-09-25 14:11:09 +08:00
2025-11-11 17:36:48 +08:00
if self . check_if_canceled ( " SearXNG processing " ) :
return
2025-08-29 14:15:40 +08:00
data = response . json ( )
2025-09-25 14:11:09 +08:00
2025-08-29 14:15:40 +08:00
if not data or not isinstance ( data , dict ) :
raise ValueError ( " Invalid response from SearXNG " )
2025-09-25 14:11:09 +08:00
2025-08-29 14:15:40 +08:00
results = data . get ( " results " , [ ] )
if not isinstance ( results , list ) :
raise ValueError ( " Invalid results format from SearXNG " )
2025-09-25 14:11:09 +08:00
2026-04-25 15:30:15 +09:00
results = results [ : self . _param . top_n ]
2025-09-25 14:11:09 +08:00
2025-11-11 17:36:48 +08:00
if self . check_if_canceled ( " SearXNG processing " ) :
return
2026-04-25 15:30:15 +09:00
self . _retrieve_chunks ( results , get_title = lambda r : r . get ( " title " , " " ) , get_url = lambda r : r . get ( " url " , " " ) , get_content = lambda r : r . get ( " content " , " " ) )
2025-09-25 14:11:09 +08:00
2025-08-29 14:15:40 +08:00
self . set_output ( " json " , results )
return self . output ( " formalized_content " )
except requests . RequestException as e :
2025-11-11 17:36:48 +08:00
if self . check_if_canceled ( " SearXNG processing " ) :
return
2025-08-29 14:15:40 +08:00
last_e = f " Network error: { e } "
logging . exception ( f " SearXNG network error: { e } " )
time . sleep ( self . _param . delay_after_error )
except Exception as e :
2025-11-11 17:36:48 +08:00
if self . check_if_canceled ( " SearXNG processing " ) :
return
2025-08-29 14:15:40 +08:00
last_e = str ( e )
logging . exception ( f " SearXNG error: { e } " )
time . sleep ( self . _param . delay_after_error )
if last_e :
self . set_output ( " _ERROR " , last_e )
return f " SearXNG error: { last_e } "
assert False , self . output ( )
def thoughts ( self ) - > str :
return """
2025-09-25 14:11:09 +08:00
Keywords : { }
2025-08-29 14:15:40 +08:00
Searching with SearXNG for relevant results . . .
""" .format(self.get_input().get( " query " , " -_-! " ))