Files

577 lines
18 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
Web Search Tool
Search the web using DuckDuckGo's search API. Supports web search, news,
images, and videos with various output formats.
Requirements:
pip install duckduckgo-search
"""
import argparse
import json
import sys
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Optional, Any
try:
from duckduckgo_search import DDGS
except ImportError as e:
print(f"Error: Missing required dependency: {e}", file=sys.stderr)
print("Install with: pip install duckduckgo-search", file=sys.stderr)
sys.exit(1)
class WebSearch:
"""Web search using DuckDuckGo."""
def __init__(
self,
region: str = "wt-wt",
safe_search: str = "moderate",
timeout: int = 20,
):
"""
Initialize the search client.
Args:
region: Region code (e.g., "us-en", "uk-en", "wt-wt" for worldwide)
safe_search: Safe search setting ("on", "moderate", "off")
timeout: Request timeout in seconds
"""
self.region = region
self.safe_search = safe_search
self.timeout = timeout
def search_text(
self,
query: str,
max_results: int = 10,
time_range: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""
Perform a text/web search.
Args:
query: Search query
max_results: Maximum number of results (default: 10)
time_range: Time filter ("d" day, "w" week, "m" month, "y" year)
Returns:
List of search results with title, href, and body
"""
try:
with DDGS() as ddgs:
results = list(ddgs.text(
keywords=query,
region=self.region,
safesearch=self.safe_search,
timelimit=time_range,
max_results=max_results,
))
return results
except Exception as e:
print(f"Error performing text search: {e}", file=sys.stderr)
return []
def search_news(
self,
query: str,
max_results: int = 10,
time_range: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""
Search for news articles.
Args:
query: Search query
max_results: Maximum number of results
time_range: Time filter ("d" day, "w" week, "m" month)
Returns:
List of news results with title, url, body, date, source
"""
try:
with DDGS() as ddgs:
results = list(ddgs.news(
keywords=query,
region=self.region,
safesearch=self.safe_search,
timelimit=time_range,
max_results=max_results,
))
return results
except Exception as e:
print(f"Error performing news search: {e}", file=sys.stderr)
return []
def search_images(
self,
query: str,
max_results: int = 10,
size: Optional[str] = None,
color: Optional[str] = None,
type_image: Optional[str] = None,
layout: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""
Search for images.
Args:
query: Search query
max_results: Maximum number of results
size: Image size ("Small", "Medium", "Large", "Wallpaper")
color: Color filter ("color", "Monochrome", "Red", "Orange", "Yellow",
"Green", "Blue", "Purple", "Pink", "Brown", "Black", "Gray", "Teal", "White")
type_image: Image type ("photo", "clipart", "gif", "transparent", "line")
layout: Layout ("Square", "Tall", "Wide")
Returns:
List of image results with title, image URL, thumbnail, source, etc.
"""
try:
with DDGS() as ddgs:
results = list(ddgs.images(
keywords=query,
region=self.region,
safesearch=self.safe_search,
size=size,
color=color,
type_image=type_image,
layout=layout,
max_results=max_results,
))
return results
except Exception as e:
print(f"Error performing image search: {e}", file=sys.stderr)
return []
def search_videos(
self,
query: str,
max_results: int = 10,
duration: Optional[str] = None,
resolution: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""
Search for videos.
Args:
query: Search query
max_results: Maximum number of results
duration: Video duration ("short", "medium", "long")
resolution: Video resolution ("high", "standard")
Returns:
List of video results with title, content, description, publisher, etc.
"""
try:
with DDGS() as ddgs:
results = list(ddgs.videos(
keywords=query,
region=self.region,
safesearch=self.safe_search,
duration=duration,
resolution=resolution,
max_results=max_results,
))
return results
except Exception as e:
print(f"Error performing video search: {e}", file=sys.stderr)
return []
def format_text_results(results: List[Dict[str, Any]], format_type: str = "text") -> str:
"""
Format search results for display.
Args:
results: List of search results
format_type: Output format ("text", "markdown", "json")
Returns:
Formatted string
"""
if not results:
return "No results found."
if format_type == "json":
return json.dumps(results, indent=2, ensure_ascii=False)
elif format_type == "markdown":
output = []
for i, result in enumerate(results, 1):
title = result.get('title', 'No title')
url = result.get('href') or result.get('url', '')
body = result.get('body') or result.get('description', '')
output.append(f"## {i}. {title}\n")
output.append(f"**URL:** {url}\n")
if body:
output.append(f"{body}\n")
output.append("")
return "\n".join(output)
else: # text format
output = []
for i, result in enumerate(results, 1):
title = result.get('title', 'No title')
url = result.get('href') or result.get('url', '')
body = result.get('body') or result.get('description', '')
output.append(f"{i}. {title}")
output.append(f" URL: {url}")
if body:
# Wrap body text
output.append(f" {body}")
output.append("")
return "\n".join(output)
def format_news_results(results: List[Dict[str, Any]], format_type: str = "text") -> str:
"""Format news search results."""
if not results:
return "No news results found."
if format_type == "json":
return json.dumps(results, indent=2, ensure_ascii=False)
elif format_type == "markdown":
output = []
for i, result in enumerate(results, 1):
title = result.get('title', 'No title')
url = result.get('url', '')
body = result.get('body', '')
date = result.get('date', '')
source = result.get('source', '')
output.append(f"## {i}. {title}\n")
if source:
output.append(f"**Source:** {source}")
if date:
output.append(f"**Date:** {date}")
output.append(f"**URL:** {url}\n")
if body:
output.append(f"{body}\n")
output.append("")
return "\n".join(output)
else: # text format
output = []
for i, result in enumerate(results, 1):
title = result.get('title', 'No title')
url = result.get('url', '')
body = result.get('body', '')
date = result.get('date', '')
source = result.get('source', '')
output.append(f"{i}. {title}")
if source and date:
output.append(f" {source} - {date}")
elif source:
output.append(f" {source}")
elif date:
output.append(f" {date}")
output.append(f" URL: {url}")
if body:
output.append(f" {body}")
output.append("")
return "\n".join(output)
def format_image_results(results: List[Dict[str, Any]], format_type: str = "text") -> str:
"""Format image search results."""
if not results:
return "No image results found."
if format_type == "json":
return json.dumps(results, indent=2, ensure_ascii=False)
elif format_type == "markdown":
output = []
for i, result in enumerate(results, 1):
title = result.get('title', 'No title')
image_url = result.get('image', '')
thumbnail = result.get('thumbnail', '')
source = result.get('source', '')
width = result.get('width', '')
height = result.get('height', '')
output.append(f"## {i}. {title}\n")
if width and height:
output.append(f"**Dimensions:** {width}x{height}")
if source:
output.append(f"**Source:** {source}")
output.append(f"**Image URL:** {image_url}")
if thumbnail:
output.append(f"**Thumbnail:** {thumbnail}")
output.append("")
return "\n".join(output)
else: # text format
output = []
for i, result in enumerate(results, 1):
title = result.get('title', 'No title')
image_url = result.get('image', '')
source = result.get('source', '')
width = result.get('width', '')
height = result.get('height', '')
output.append(f"{i}. {title}")
if width and height:
output.append(f" Dimensions: {width}x{height}")
if source:
output.append(f" Source: {source}")
output.append(f" Image URL: {image_url}")
output.append("")
return "\n".join(output)
def format_video_results(results: List[Dict[str, Any]], format_type: str = "text") -> str:
"""Format video search results."""
if not results:
return "No video results found."
if format_type == "json":
return json.dumps(results, indent=2, ensure_ascii=False)
elif format_type == "markdown":
output = []
for i, result in enumerate(results, 1):
title = result.get('title', 'No title')
url = result.get('content', '')
description = result.get('description', '')
publisher = result.get('publisher', '')
duration = result.get('duration', '')
published = result.get('published', '')
output.append(f"## {i}. {title}\n")
if publisher:
output.append(f"**Publisher:** {publisher}")
if duration:
output.append(f"**Duration:** {duration}")
if published:
output.append(f"**Published:** {published}")
output.append(f"**URL:** {url}\n")
if description:
output.append(f"{description}\n")
output.append("")
return "\n".join(output)
else: # text format
output = []
for i, result in enumerate(results, 1):
title = result.get('title', 'No title')
url = result.get('content', '')
description = result.get('description', '')
publisher = result.get('publisher', '')
duration = result.get('duration', '')
output.append(f"{i}. {title}")
if publisher and duration:
output.append(f" {publisher} - {duration}")
elif publisher:
output.append(f" {publisher}")
output.append(f" URL: {url}")
if description:
output.append(f" {description}")
output.append("")
return "\n".join(output)
def main():
parser = argparse.ArgumentParser(
description="Search the web using DuckDuckGo",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Basic web search
%(prog)s "python tutorials"
# Search with more results
%(prog)s "machine learning" --max-results 20
# News search
%(prog)s "climate change" --type news --time-range w
# Image search
%(prog)s "sunset photos" --type images --max-results 15
# Save results to file
%(prog)s "artificial intelligence" --output results.txt
# JSON output format
%(prog)s "quantum computing" --format json --output results.json
# Region-specific search
%(prog)s "local news" --region us-en --type news
Time range filters (--time-range):
d = past day
w = past week
m = past month
y = past year
"""
)
parser.add_argument(
'query',
help='Search query'
)
# Search options
search_group = parser.add_argument_group('search options')
search_group.add_argument(
'-t', '--type',
choices=['web', 'news', 'images', 'videos'],
default='web',
help='Search type (default: web)'
)
search_group.add_argument(
'-n', '--max-results',
type=int,
default=10,
help='Maximum number of results (default: 10)'
)
search_group.add_argument(
'--time-range',
choices=['d', 'w', 'm', 'y'],
help='Time range filter (d=day, w=week, m=month, y=year)'
)
search_group.add_argument(
'-r', '--region',
default='wt-wt',
help='Region code (e.g., us-en, uk-en, wt-wt for worldwide, default: wt-wt)'
)
search_group.add_argument(
'--safe-search',
choices=['on', 'moderate', 'off'],
default='moderate',
help='Safe search setting (default: moderate)'
)
# Image-specific options
image_group = parser.add_argument_group('image search options')
image_group.add_argument(
'--image-size',
choices=['Small', 'Medium', 'Large', 'Wallpaper'],
help='Image size filter'
)
image_group.add_argument(
'--image-color',
choices=['color', 'Monochrome', 'Red', 'Orange', 'Yellow', 'Green',
'Blue', 'Purple', 'Pink', 'Brown', 'Black', 'Gray', 'Teal', 'White'],
help='Image color filter'
)
image_group.add_argument(
'--image-type',
choices=['photo', 'clipart', 'gif', 'transparent', 'line'],
help='Image type filter'
)
image_group.add_argument(
'--image-layout',
choices=['Square', 'Tall', 'Wide'],
help='Image layout filter'
)
# Video-specific options
video_group = parser.add_argument_group('video search options')
video_group.add_argument(
'--video-duration',
choices=['short', 'medium', 'long'],
help='Video duration filter'
)
video_group.add_argument(
'--video-resolution',
choices=['high', 'standard'],
help='Video resolution filter'
)
# Output options
output_group = parser.add_argument_group('output options')
output_group.add_argument(
'-f', '--format',
choices=['text', 'markdown', 'json'],
default='text',
help='Output format (default: text)'
)
output_group.add_argument(
'-o', '--output',
help='Output file path (prints to stdout if not specified)'
)
args = parser.parse_args()
# Initialize search client
searcher = WebSearch(
region=args.region,
safe_search=args.safe_search,
)
# Perform search based on type
print(f"Searching for: {args.query}", file=sys.stderr)
print(f"Type: {args.type}, Max results: {args.max_results}", file=sys.stderr)
if args.time_range:
time_labels = {'d': 'past day', 'w': 'past week', 'm': 'past month', 'y': 'past year'}
print(f"Time range: {time_labels[args.time_range]}", file=sys.stderr)
print("", file=sys.stderr)
results = []
formatter = format_text_results
if args.type == 'web':
results = searcher.search_text(
query=args.query,
max_results=args.max_results,
time_range=args.time_range,
)
formatter = format_text_results
elif args.type == 'news':
results = searcher.search_news(
query=args.query,
max_results=args.max_results,
time_range=args.time_range,
)
formatter = format_news_results
elif args.type == 'images':
results = searcher.search_images(
query=args.query,
max_results=args.max_results,
size=args.image_size,
color=args.image_color,
type_image=args.image_type,
layout=args.image_layout,
)
formatter = format_image_results
elif args.type == 'videos':
results = searcher.search_videos(
query=args.query,
max_results=args.max_results,
duration=args.video_duration,
resolution=args.video_resolution,
)
formatter = format_video_results
# Format results
output = formatter(results, args.format)
# Output results
if args.output:
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(output, encoding='utf-8')
print(f"✓ Results saved to {args.output}", file=sys.stderr)
print(f" Found {len(results)} result(s)", file=sys.stderr)
else:
print(output)
print(f"\nFound {len(results)} result(s)", file=sys.stderr)
if __name__ == '__main__':
main()