mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-06-29 23:41:12 +08:00
Adds Keenable as a web search tool in the agent, alongside the existing Tavily/DuckDuckGo/SearXNG/Google tools. The main difference from the other search tools is that it doesn't require an API key. By default it uses Keenable's keyless public endpoint, so it works out of the box. Providing a key (in the tool config) switches to the authenticated endpoint and lifts the rate limits. ### Changes - Backend: `agent/tools/keenable.py` — `KeenableSearch`, follows the Tavily/DuckDuckGo tool shape (results go through `_retrieve_chunks`). Auto-registered by `agent/tools/__init__.py`. - Frontend: wired into the agent builder — operator + icon, config form (optional API key, search mode, site filter, top N), the search tool menu, and the existing api_key export sanitizer. ### Config - API key: optional. Blank = keyless free tier; set it to lift limits / enable `realtime` mode. - `site`: restrict to a single domain. - `mode`: `pro` (default) or `realtime`. ### Notes `KEENABLE_API_URL` can override the API base (HTTPS enforced; defaults to `https://api.keenable.ai`). The tool only sends the query (no URL fetch), so there's no SSRF surface. Verified the frontend with `vite build` and the backend search path against the public endpoint.
184 lines
7.1 KiB
Python
184 lines
7.1 KiB
Python
#
|
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
import logging
|
|
import os
|
|
import time
|
|
from abc import ABC
|
|
from urllib.parse import urlsplit
|
|
|
|
import requests
|
|
|
|
from agent.tools.base import ToolBase, ToolMeta, ToolParamBase
|
|
from common.connection_utils import timeout
|
|
|
|
|
|
def _base_url() -> str:
|
|
"""Resolve the Keenable API base URL from ``KEENABLE_API_URL`` (HTTPS enforced)."""
|
|
base = (os.environ.get("KEENABLE_API_URL") or "https://api.keenable.ai").rstrip("/")
|
|
parsed = urlsplit(base)
|
|
if parsed.hostname:
|
|
if parsed.scheme == "https":
|
|
return base
|
|
# Permit plain http only against a loopback host (local dev).
|
|
if parsed.scheme == "http" and parsed.hostname in {"localhost", "127.0.0.1", "::1"}:
|
|
return base
|
|
raise ValueError(f"KEENABLE_API_URL must be an https:// URL with a host, got {base!r}")
|
|
|
|
|
|
def _request(method: str, public_path: str, keyed_path: str, api_key: str, *, params=None, json=None, timeout_s: int = 30):
|
|
"""Call the keyed endpoint with X-API-Key when a key is set, else the keyless public one."""
|
|
api_key = (api_key or "").strip()
|
|
headers = {
|
|
"User-Agent": "keenable-ragflow",
|
|
# Attribution header the Keenable backend segments traffic by.
|
|
"X-Keenable-Title": "RAGFlow",
|
|
}
|
|
if api_key:
|
|
path = keyed_path
|
|
headers["X-API-Key"] = api_key
|
|
else:
|
|
path = public_path
|
|
resp = requests.request(method, f"{_base_url()}{path}", headers=headers, params=params, json=json, timeout=timeout_s)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
|
|
class KeenableSearchParam(ToolParamBase):
|
|
"""
|
|
Define the Keenable search component parameters.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.meta: ToolMeta = {
|
|
"name": "keenable_search",
|
|
"description": """
|
|
Keenable is a web search API built for AI agents. It returns fresh, relevant web
|
|
results for a query and works without an API key by default (keyless free tier).
|
|
When searching:
|
|
- Use a focused query of the most important terms (and synonyms).
|
|
- Optionally restrict to a single site/domain.
|
|
""",
|
|
"parameters": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "The search keywords to execute with Keenable. The keywords should be the most important words/terms(includes synonyms) from the original request.",
|
|
"default": "{sys.query}",
|
|
"required": True,
|
|
},
|
|
"site": {
|
|
"type": "string",
|
|
"description": "default:''. Restrict results to a single domain, e.g. 'techcrunch.com'.",
|
|
"default": "",
|
|
"required": False,
|
|
},
|
|
},
|
|
}
|
|
super().__init__()
|
|
# A key is optional: blank uses the keyless public endpoint (free tier);
|
|
# setting one lifts rate limits and enables the 'realtime' mode.
|
|
self.api_key = ""
|
|
# "pro" (default, deeper) or "realtime" (low latency; requires a key).
|
|
self.mode = "pro"
|
|
self.top_n = 10
|
|
|
|
def check(self):
|
|
self.check_valid_value(self.mode, "Keenable search mode should be in 'pro/realtime'", ["pro", "realtime"])
|
|
self.check_positive_integer(self.top_n, "Top N")
|
|
# 'realtime' is not available on the keyless public endpoint, so reject
|
|
# the invalid combination at config time instead of failing at runtime.
|
|
if self.mode == "realtime" and not (self.api_key or "").strip():
|
|
raise ValueError("Keenable 'realtime' mode requires an API key")
|
|
|
|
def get_input_form(self) -> dict[str, dict]:
|
|
return {
|
|
"query": {
|
|
"name": "Query",
|
|
"type": "line",
|
|
},
|
|
"site": {
|
|
"name": "Site",
|
|
"type": "line",
|
|
},
|
|
}
|
|
|
|
|
|
class KeenableSearch(ToolBase, ABC):
|
|
component_name = "KeenableSearch"
|
|
|
|
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
|
def _invoke(self, **kwargs):
|
|
if self.check_if_canceled("KeenableSearch processing"):
|
|
return
|
|
|
|
if not kwargs.get("query"):
|
|
self.set_output("formalized_content", "")
|
|
return ""
|
|
|
|
payload = {"query": kwargs["query"], "mode": self._param.mode}
|
|
if kwargs.get("site"):
|
|
payload["site"] = kwargs["site"]
|
|
|
|
logging.info(f"KeenableSearch: starting search (mode={self._param.mode}, keyed={bool((self._param.api_key or '').strip())})")
|
|
last_e = None
|
|
for _ in range(self._param.max_retries + 1):
|
|
if self.check_if_canceled("KeenableSearch processing"):
|
|
logging.info("KeenableSearch: cancelled before request")
|
|
return
|
|
|
|
try:
|
|
data = _request("POST", "/v1/search/public", "/v1/search", self._param.api_key, json=payload)
|
|
if self.check_if_canceled("KeenableSearch processing"):
|
|
logging.info("KeenableSearch: cancelled after request")
|
|
return
|
|
|
|
results = (data.get("results") or [])[: self._param.top_n]
|
|
self._retrieve_chunks(
|
|
results,
|
|
get_title=lambda r: r.get("title"),
|
|
get_url=lambda r: r.get("url"),
|
|
get_content=lambda r: r.get("description"),
|
|
)
|
|
self.set_output("json", results)
|
|
logging.info(f"KeenableSearch: returned {len(results)} results")
|
|
return self.output("formalized_content")
|
|
except ValueError as e:
|
|
# Config/local errors (e.g. invalid KEENABLE_API_URL) won't be
|
|
# fixed by retrying, so fail fast instead of sleeping.
|
|
if self.check_if_canceled("KeenableSearch processing"):
|
|
return
|
|
last_e = e
|
|
logging.exception(f"Keenable config error: {e}")
|
|
break
|
|
except Exception as e:
|
|
if self.check_if_canceled("KeenableSearch processing"):
|
|
return
|
|
|
|
last_e = e
|
|
logging.exception(f"Keenable error: {e}")
|
|
time.sleep(self._param.delay_after_error)
|
|
|
|
if last_e:
|
|
self.set_output("_ERROR", str(last_e))
|
|
return f"Keenable error: {last_e}"
|
|
|
|
assert False, self.output()
|
|
|
|
def thoughts(self) -> str:
|
|
return """
|
|
Keywords: {}
|
|
Looking for the most relevant articles.
|
|
""".format(self.get_input().get("query", "-_-!"))
|