577 lines
18 KiB
Python
577 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Web Search Tool
|
|
|
|
Search the web using DuckDuckGo's search API. Supports web search, news,
|
|
images, and videos with various output formats.
|
|
|
|
Requirements:
|
|
pip install duckduckgo-search
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import List, Dict, Optional, Any
|
|
|
|
try:
|
|
from duckduckgo_search import DDGS
|
|
except ImportError as e:
|
|
print(f"Error: Missing required dependency: {e}", file=sys.stderr)
|
|
print("Install with: pip install duckduckgo-search", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
class WebSearch:
|
|
"""Web search using DuckDuckGo."""
|
|
|
|
def __init__(
|
|
self,
|
|
region: str = "wt-wt",
|
|
safe_search: str = "moderate",
|
|
timeout: int = 20,
|
|
):
|
|
"""
|
|
Initialize the search client.
|
|
|
|
Args:
|
|
region: Region code (e.g., "us-en", "uk-en", "wt-wt" for worldwide)
|
|
safe_search: Safe search setting ("on", "moderate", "off")
|
|
timeout: Request timeout in seconds
|
|
"""
|
|
self.region = region
|
|
self.safe_search = safe_search
|
|
self.timeout = timeout
|
|
|
|
def search_text(
|
|
self,
|
|
query: str,
|
|
max_results: int = 10,
|
|
time_range: Optional[str] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Perform a text/web search.
|
|
|
|
Args:
|
|
query: Search query
|
|
max_results: Maximum number of results (default: 10)
|
|
time_range: Time filter ("d" day, "w" week, "m" month, "y" year)
|
|
|
|
Returns:
|
|
List of search results with title, href, and body
|
|
"""
|
|
try:
|
|
with DDGS() as ddgs:
|
|
results = list(ddgs.text(
|
|
keywords=query,
|
|
region=self.region,
|
|
safesearch=self.safe_search,
|
|
timelimit=time_range,
|
|
max_results=max_results,
|
|
))
|
|
return results
|
|
except Exception as e:
|
|
print(f"Error performing text search: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
def search_news(
|
|
self,
|
|
query: str,
|
|
max_results: int = 10,
|
|
time_range: Optional[str] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Search for news articles.
|
|
|
|
Args:
|
|
query: Search query
|
|
max_results: Maximum number of results
|
|
time_range: Time filter ("d" day, "w" week, "m" month)
|
|
|
|
Returns:
|
|
List of news results with title, url, body, date, source
|
|
"""
|
|
try:
|
|
with DDGS() as ddgs:
|
|
results = list(ddgs.news(
|
|
keywords=query,
|
|
region=self.region,
|
|
safesearch=self.safe_search,
|
|
timelimit=time_range,
|
|
max_results=max_results,
|
|
))
|
|
return results
|
|
except Exception as e:
|
|
print(f"Error performing news search: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
def search_images(
|
|
self,
|
|
query: str,
|
|
max_results: int = 10,
|
|
size: Optional[str] = None,
|
|
color: Optional[str] = None,
|
|
type_image: Optional[str] = None,
|
|
layout: Optional[str] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Search for images.
|
|
|
|
Args:
|
|
query: Search query
|
|
max_results: Maximum number of results
|
|
size: Image size ("Small", "Medium", "Large", "Wallpaper")
|
|
color: Color filter ("color", "Monochrome", "Red", "Orange", "Yellow",
|
|
"Green", "Blue", "Purple", "Pink", "Brown", "Black", "Gray", "Teal", "White")
|
|
type_image: Image type ("photo", "clipart", "gif", "transparent", "line")
|
|
layout: Layout ("Square", "Tall", "Wide")
|
|
|
|
Returns:
|
|
List of image results with title, image URL, thumbnail, source, etc.
|
|
"""
|
|
try:
|
|
with DDGS() as ddgs:
|
|
results = list(ddgs.images(
|
|
keywords=query,
|
|
region=self.region,
|
|
safesearch=self.safe_search,
|
|
size=size,
|
|
color=color,
|
|
type_image=type_image,
|
|
layout=layout,
|
|
max_results=max_results,
|
|
))
|
|
return results
|
|
except Exception as e:
|
|
print(f"Error performing image search: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
def search_videos(
|
|
self,
|
|
query: str,
|
|
max_results: int = 10,
|
|
duration: Optional[str] = None,
|
|
resolution: Optional[str] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Search for videos.
|
|
|
|
Args:
|
|
query: Search query
|
|
max_results: Maximum number of results
|
|
duration: Video duration ("short", "medium", "long")
|
|
resolution: Video resolution ("high", "standard")
|
|
|
|
Returns:
|
|
List of video results with title, content, description, publisher, etc.
|
|
"""
|
|
try:
|
|
with DDGS() as ddgs:
|
|
results = list(ddgs.videos(
|
|
keywords=query,
|
|
region=self.region,
|
|
safesearch=self.safe_search,
|
|
duration=duration,
|
|
resolution=resolution,
|
|
max_results=max_results,
|
|
))
|
|
return results
|
|
except Exception as e:
|
|
print(f"Error performing video search: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
|
|
def format_text_results(results: List[Dict[str, Any]], format_type: str = "text") -> str:
|
|
"""
|
|
Format search results for display.
|
|
|
|
Args:
|
|
results: List of search results
|
|
format_type: Output format ("text", "markdown", "json")
|
|
|
|
Returns:
|
|
Formatted string
|
|
"""
|
|
if not results:
|
|
return "No results found."
|
|
|
|
if format_type == "json":
|
|
return json.dumps(results, indent=2, ensure_ascii=False)
|
|
|
|
elif format_type == "markdown":
|
|
output = []
|
|
for i, result in enumerate(results, 1):
|
|
title = result.get('title', 'No title')
|
|
url = result.get('href') or result.get('url', '')
|
|
body = result.get('body') or result.get('description', '')
|
|
|
|
output.append(f"## {i}. {title}\n")
|
|
output.append(f"**URL:** {url}\n")
|
|
if body:
|
|
output.append(f"{body}\n")
|
|
output.append("")
|
|
return "\n".join(output)
|
|
|
|
else: # text format
|
|
output = []
|
|
for i, result in enumerate(results, 1):
|
|
title = result.get('title', 'No title')
|
|
url = result.get('href') or result.get('url', '')
|
|
body = result.get('body') or result.get('description', '')
|
|
|
|
output.append(f"{i}. {title}")
|
|
output.append(f" URL: {url}")
|
|
if body:
|
|
# Wrap body text
|
|
output.append(f" {body}")
|
|
output.append("")
|
|
return "\n".join(output)
|
|
|
|
|
|
def format_news_results(results: List[Dict[str, Any]], format_type: str = "text") -> str:
|
|
"""Format news search results."""
|
|
if not results:
|
|
return "No news results found."
|
|
|
|
if format_type == "json":
|
|
return json.dumps(results, indent=2, ensure_ascii=False)
|
|
|
|
elif format_type == "markdown":
|
|
output = []
|
|
for i, result in enumerate(results, 1):
|
|
title = result.get('title', 'No title')
|
|
url = result.get('url', '')
|
|
body = result.get('body', '')
|
|
date = result.get('date', '')
|
|
source = result.get('source', '')
|
|
|
|
output.append(f"## {i}. {title}\n")
|
|
if source:
|
|
output.append(f"**Source:** {source}")
|
|
if date:
|
|
output.append(f"**Date:** {date}")
|
|
output.append(f"**URL:** {url}\n")
|
|
if body:
|
|
output.append(f"{body}\n")
|
|
output.append("")
|
|
return "\n".join(output)
|
|
|
|
else: # text format
|
|
output = []
|
|
for i, result in enumerate(results, 1):
|
|
title = result.get('title', 'No title')
|
|
url = result.get('url', '')
|
|
body = result.get('body', '')
|
|
date = result.get('date', '')
|
|
source = result.get('source', '')
|
|
|
|
output.append(f"{i}. {title}")
|
|
if source and date:
|
|
output.append(f" {source} - {date}")
|
|
elif source:
|
|
output.append(f" {source}")
|
|
elif date:
|
|
output.append(f" {date}")
|
|
output.append(f" URL: {url}")
|
|
if body:
|
|
output.append(f" {body}")
|
|
output.append("")
|
|
return "\n".join(output)
|
|
|
|
|
|
def format_image_results(results: List[Dict[str, Any]], format_type: str = "text") -> str:
|
|
"""Format image search results."""
|
|
if not results:
|
|
return "No image results found."
|
|
|
|
if format_type == "json":
|
|
return json.dumps(results, indent=2, ensure_ascii=False)
|
|
|
|
elif format_type == "markdown":
|
|
output = []
|
|
for i, result in enumerate(results, 1):
|
|
title = result.get('title', 'No title')
|
|
image_url = result.get('image', '')
|
|
thumbnail = result.get('thumbnail', '')
|
|
source = result.get('source', '')
|
|
width = result.get('width', '')
|
|
height = result.get('height', '')
|
|
|
|
output.append(f"## {i}. {title}\n")
|
|
if width and height:
|
|
output.append(f"**Dimensions:** {width}x{height}")
|
|
if source:
|
|
output.append(f"**Source:** {source}")
|
|
output.append(f"**Image URL:** {image_url}")
|
|
if thumbnail:
|
|
output.append(f"**Thumbnail:** {thumbnail}")
|
|
output.append("")
|
|
return "\n".join(output)
|
|
|
|
else: # text format
|
|
output = []
|
|
for i, result in enumerate(results, 1):
|
|
title = result.get('title', 'No title')
|
|
image_url = result.get('image', '')
|
|
source = result.get('source', '')
|
|
width = result.get('width', '')
|
|
height = result.get('height', '')
|
|
|
|
output.append(f"{i}. {title}")
|
|
if width and height:
|
|
output.append(f" Dimensions: {width}x{height}")
|
|
if source:
|
|
output.append(f" Source: {source}")
|
|
output.append(f" Image URL: {image_url}")
|
|
output.append("")
|
|
return "\n".join(output)
|
|
|
|
|
|
def format_video_results(results: List[Dict[str, Any]], format_type: str = "text") -> str:
|
|
"""Format video search results."""
|
|
if not results:
|
|
return "No video results found."
|
|
|
|
if format_type == "json":
|
|
return json.dumps(results, indent=2, ensure_ascii=False)
|
|
|
|
elif format_type == "markdown":
|
|
output = []
|
|
for i, result in enumerate(results, 1):
|
|
title = result.get('title', 'No title')
|
|
url = result.get('content', '')
|
|
description = result.get('description', '')
|
|
publisher = result.get('publisher', '')
|
|
duration = result.get('duration', '')
|
|
published = result.get('published', '')
|
|
|
|
output.append(f"## {i}. {title}\n")
|
|
if publisher:
|
|
output.append(f"**Publisher:** {publisher}")
|
|
if duration:
|
|
output.append(f"**Duration:** {duration}")
|
|
if published:
|
|
output.append(f"**Published:** {published}")
|
|
output.append(f"**URL:** {url}\n")
|
|
if description:
|
|
output.append(f"{description}\n")
|
|
output.append("")
|
|
return "\n".join(output)
|
|
|
|
else: # text format
|
|
output = []
|
|
for i, result in enumerate(results, 1):
|
|
title = result.get('title', 'No title')
|
|
url = result.get('content', '')
|
|
description = result.get('description', '')
|
|
publisher = result.get('publisher', '')
|
|
duration = result.get('duration', '')
|
|
|
|
output.append(f"{i}. {title}")
|
|
if publisher and duration:
|
|
output.append(f" {publisher} - {duration}")
|
|
elif publisher:
|
|
output.append(f" {publisher}")
|
|
output.append(f" URL: {url}")
|
|
if description:
|
|
output.append(f" {description}")
|
|
output.append("")
|
|
return "\n".join(output)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Search the web using DuckDuckGo",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Basic web search
|
|
%(prog)s "python tutorials"
|
|
|
|
# Search with more results
|
|
%(prog)s "machine learning" --max-results 20
|
|
|
|
# News search
|
|
%(prog)s "climate change" --type news --time-range w
|
|
|
|
# Image search
|
|
%(prog)s "sunset photos" --type images --max-results 15
|
|
|
|
# Save results to file
|
|
%(prog)s "artificial intelligence" --output results.txt
|
|
|
|
# JSON output format
|
|
%(prog)s "quantum computing" --format json --output results.json
|
|
|
|
# Region-specific search
|
|
%(prog)s "local news" --region us-en --type news
|
|
|
|
Time range filters (--time-range):
|
|
d = past day
|
|
w = past week
|
|
m = past month
|
|
y = past year
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
'query',
|
|
help='Search query'
|
|
)
|
|
|
|
# Search options
|
|
search_group = parser.add_argument_group('search options')
|
|
search_group.add_argument(
|
|
'-t', '--type',
|
|
choices=['web', 'news', 'images', 'videos'],
|
|
default='web',
|
|
help='Search type (default: web)'
|
|
)
|
|
search_group.add_argument(
|
|
'-n', '--max-results',
|
|
type=int,
|
|
default=10,
|
|
help='Maximum number of results (default: 10)'
|
|
)
|
|
search_group.add_argument(
|
|
'--time-range',
|
|
choices=['d', 'w', 'm', 'y'],
|
|
help='Time range filter (d=day, w=week, m=month, y=year)'
|
|
)
|
|
search_group.add_argument(
|
|
'-r', '--region',
|
|
default='wt-wt',
|
|
help='Region code (e.g., us-en, uk-en, wt-wt for worldwide, default: wt-wt)'
|
|
)
|
|
search_group.add_argument(
|
|
'--safe-search',
|
|
choices=['on', 'moderate', 'off'],
|
|
default='moderate',
|
|
help='Safe search setting (default: moderate)'
|
|
)
|
|
|
|
# Image-specific options
|
|
image_group = parser.add_argument_group('image search options')
|
|
image_group.add_argument(
|
|
'--image-size',
|
|
choices=['Small', 'Medium', 'Large', 'Wallpaper'],
|
|
help='Image size filter'
|
|
)
|
|
image_group.add_argument(
|
|
'--image-color',
|
|
choices=['color', 'Monochrome', 'Red', 'Orange', 'Yellow', 'Green',
|
|
'Blue', 'Purple', 'Pink', 'Brown', 'Black', 'Gray', 'Teal', 'White'],
|
|
help='Image color filter'
|
|
)
|
|
image_group.add_argument(
|
|
'--image-type',
|
|
choices=['photo', 'clipart', 'gif', 'transparent', 'line'],
|
|
help='Image type filter'
|
|
)
|
|
image_group.add_argument(
|
|
'--image-layout',
|
|
choices=['Square', 'Tall', 'Wide'],
|
|
help='Image layout filter'
|
|
)
|
|
|
|
# Video-specific options
|
|
video_group = parser.add_argument_group('video search options')
|
|
video_group.add_argument(
|
|
'--video-duration',
|
|
choices=['short', 'medium', 'long'],
|
|
help='Video duration filter'
|
|
)
|
|
video_group.add_argument(
|
|
'--video-resolution',
|
|
choices=['high', 'standard'],
|
|
help='Video resolution filter'
|
|
)
|
|
|
|
# Output options
|
|
output_group = parser.add_argument_group('output options')
|
|
output_group.add_argument(
|
|
'-f', '--format',
|
|
choices=['text', 'markdown', 'json'],
|
|
default='text',
|
|
help='Output format (default: text)'
|
|
)
|
|
output_group.add_argument(
|
|
'-o', '--output',
|
|
help='Output file path (prints to stdout if not specified)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Initialize search client
|
|
searcher = WebSearch(
|
|
region=args.region,
|
|
safe_search=args.safe_search,
|
|
)
|
|
|
|
# Perform search based on type
|
|
print(f"Searching for: {args.query}", file=sys.stderr)
|
|
print(f"Type: {args.type}, Max results: {args.max_results}", file=sys.stderr)
|
|
if args.time_range:
|
|
time_labels = {'d': 'past day', 'w': 'past week', 'm': 'past month', 'y': 'past year'}
|
|
print(f"Time range: {time_labels[args.time_range]}", file=sys.stderr)
|
|
print("", file=sys.stderr)
|
|
|
|
results = []
|
|
formatter = format_text_results
|
|
|
|
if args.type == 'web':
|
|
results = searcher.search_text(
|
|
query=args.query,
|
|
max_results=args.max_results,
|
|
time_range=args.time_range,
|
|
)
|
|
formatter = format_text_results
|
|
|
|
elif args.type == 'news':
|
|
results = searcher.search_news(
|
|
query=args.query,
|
|
max_results=args.max_results,
|
|
time_range=args.time_range,
|
|
)
|
|
formatter = format_news_results
|
|
|
|
elif args.type == 'images':
|
|
results = searcher.search_images(
|
|
query=args.query,
|
|
max_results=args.max_results,
|
|
size=args.image_size,
|
|
color=args.image_color,
|
|
type_image=args.image_type,
|
|
layout=args.image_layout,
|
|
)
|
|
formatter = format_image_results
|
|
|
|
elif args.type == 'videos':
|
|
results = searcher.search_videos(
|
|
query=args.query,
|
|
max_results=args.max_results,
|
|
duration=args.video_duration,
|
|
resolution=args.video_resolution,
|
|
)
|
|
formatter = format_video_results
|
|
|
|
# Format results
|
|
output = formatter(results, args.format)
|
|
|
|
# Output results
|
|
if args.output:
|
|
output_path = Path(args.output)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.write_text(output, encoding='utf-8')
|
|
print(f"✓ Results saved to {args.output}", file=sys.stderr)
|
|
print(f" Found {len(results)} result(s)", file=sys.stderr)
|
|
else:
|
|
print(output)
|
|
print(f"\nFound {len(results)} result(s)", file=sys.stderr)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|