From 8864c3aa2b906532b6b9ea105ae4989dea8b3d61 Mon Sep 17 00:00:00 2001 From: zlei9 Date: Sun, 29 Mar 2026 08:35:45 +0800 Subject: [PATCH] Initial commit with translated description --- SKILL.md | 414 ++++++++++++++++++++++++++++++++++++ _meta.json | 6 + references/api-reference.md | 187 ++++++++++++++++ scripts/tavily_search.py | 247 +++++++++++++++++++++ 4 files changed, 854 insertions(+) create mode 100644 SKILL.md create mode 100644 _meta.json create mode 100644 references/api-reference.md create mode 100644 scripts/tavily_search.py diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..adad166 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,414 @@ +--- +name: tavily +description: "使用Tavily搜索API进行AI优化的网络搜索。在需要全面网络研究、当前事件查询、特定领域搜索或AI生成的答案摘要时使用。Tavily针对LLM消费进行了优化,提供清晰的结构化结果、答案生成和原始内容提取。最适合研究任务、新闻查询、事实核查和收集权威来源。" +--- + +# Tavily AI Search + +## Overview + +Tavily is a search engine specifically optimized for Large Language Models and AI applications. Unlike traditional search APIs, Tavily provides AI-ready results with optional answer generation, clean content extraction, and domain filtering capabilities. + +**Key capabilities:** +- AI-generated answer summaries from search results +- Clean, structured results optimized for LLM processing +- Fast (`basic`) and comprehensive (`advanced`) search modes +- Domain filtering (include/exclude specific sources) +- News-focused search for current events +- Image search with relevant visual content +- Raw content extraction for deeper analysis + +## Architecture + +```mermaid +graph TB + A[User Query] --> B{Search Mode} + B -->|basic| C[Fast Search
1-2s response] + B -->|advanced| D[Comprehensive Search
5-10s response] + + C --> E[Tavily API] + D --> E + + E --> F{Topic Filter} + F -->|general| G[Broad Web Search] + F -->|news| H[News Sources
Last 7 days] + + G --> I[Domain Filtering] + H --> I + + I --> J{Include Domains?} + J -->|yes| K[Filter to Specific Domains] + J -->|no| L{Exclude Domains?} + K --> M[Search Results] + L -->|yes| N[Remove Unwanted Domains] + L -->|no| M + N --> M + + M --> O{Response Options} + O --> P[AI Answer
Summary] + O --> Q[Structured Results
Title, URL, Content, Score] + O --> R[Images
if requested] + O --> S[Raw HTML Content
if requested] + + P --> T[Return to Agent] + Q --> T + R --> T + S --> T + + style E fill:#4A90E2 + style P fill:#7ED321 + style Q fill:#7ED321 + style R fill:#F5A623 + style S fill:#F5A623 +``` + +## Quick Start + +### Basic Search + +```bash +# Simple query with AI answer +scripts/tavily_search.py "What is quantum computing?" + +# Multiple results +scripts/tavily_search.py "Python best practices" --max-results 10 +``` + +### Advanced Search + +```bash +# Comprehensive research mode +scripts/tavily_search.py "Climate change solutions" --depth advanced + +# News-focused search +scripts/tavily_search.py "AI developments 2026" --topic news +``` + +### Domain Filtering + +```bash +# Search only trusted domains +scripts/tavily_search.py "Python tutorials" \ + --include-domains python.org docs.python.org realpython.com + +# Exclude low-quality sources +scripts/tavily_search.py "How to code" \ + --exclude-domains w3schools.com geeksforgeeks.org +``` + +### With Images + +```bash +# Include relevant images +scripts/tavily_search.py "Eiffel Tower architecture" --images +``` + +## Search Modes + +### Basic vs Advanced + +| Mode | Speed | Coverage | Use Case | +|------|-------|----------|----------| +| **basic** | 1-2s | Good | Quick facts, simple queries | +| **advanced** | 5-10s | Excellent | Research, complex topics, comprehensive analysis | + +**Decision tree:** +1. Need a quick fact or definition? → Use `basic` +2. Researching a complex topic? → Use `advanced` +3. Need multiple perspectives? → Use `advanced` +4. Time-sensitive query? → Use `basic` + +### General vs News + +| Topic | Time Range | Sources | Use Case | +|-------|------------|---------|----------| +| **general** | All time | Broad web | Evergreen content, tutorials, documentation | +| **news** | Last 7 days | News sites | Current events, recent developments, breaking news | + +**Decision tree:** +1. Query contains "latest", "recent", "current", "today"? → Use `news` +2. Looking for historical or evergreen content? → Use `general` +3. Need up-to-date information? → Use `news` + +## API Key Setup + +### Option 1: Clawdbot Config (Recommended) + +Add to your Clawdbot config: + +```json +{ + "skills": { + "entries": { + "tavily": { + "enabled": true, + "apiKey": "tvly-YOUR_API_KEY_HERE" + } + } + } +} +``` + +Access in scripts via Clawdbot's config system. + +### Option 2: Environment Variable + +```bash +export TAVILY_API_KEY="tvly-YOUR_API_KEY_HERE" +``` + +Add to `~/.clawdbot/.env` or your shell profile. + +### Getting an API Key + +1. Visit https://tavily.com +2. Sign up for an account +3. Navigate to your dashboard +4. Generate an API key (starts with `tvly-`) +5. Note your plan's rate limits and credit allocation + +## Common Use Cases + +### 1. Research & Fact-Finding + +```bash +# Comprehensive research with answer +scripts/tavily_search.py "Explain quantum entanglement" --depth advanced + +# Multiple authoritative sources +scripts/tavily_search.py "Best practices for REST API design" \ + --max-results 10 \ + --include-domains github.com microsoft.com google.com +``` + +### 2. Current Events + +```bash +# Latest news +scripts/tavily_search.py "AI policy updates" --topic news + +# Recent developments in a field +scripts/tavily_search.py "quantum computing breakthroughs" \ + --topic news \ + --depth advanced +``` + +### 3. Domain-Specific Research + +```bash +# Academic sources only +scripts/tavily_search.py "machine learning algorithms" \ + --include-domains arxiv.org scholar.google.com ieee.org + +# Technical documentation +scripts/tavily_search.py "React hooks guide" \ + --include-domains react.dev +``` + +### 4. Visual Research + +```bash +# Gather visual references +scripts/tavily_search.py "modern web design trends" \ + --images \ + --max-results 10 +``` + +### 5. Content Extraction + +```bash +# Get raw HTML content for deeper analysis +scripts/tavily_search.py "Python async/await" \ + --raw-content \ + --max-results 5 +``` + +## Response Handling + +### AI Answer + +The AI-generated answer provides a concise summary synthesized from search results: + +```python +{ + "answer": "Quantum computing is a type of computing that uses quantum-mechanical phenomena..." +} +``` + +**Use when:** +- Need a quick summary +- Want synthesized information from multiple sources +- Looking for a direct answer to a question + +**Skip when** (`--no-answer`): +- Only need source URLs +- Want to form your own synthesis +- Conserving API credits + +### Structured Results + +Each result includes: +- `title`: Page title +- `url`: Source URL +- `content`: Extracted text snippet +- `score`: Relevance score (0-1) +- `raw_content`: Full HTML (if `--raw-content` enabled) + +### Images + +When `--images` is enabled, returns URLs of relevant images found during search. + +## Best Practices + +### 1. Choose the Right Search Depth + +- Start with `basic` for most queries (faster, cheaper) +- Escalate to `advanced` only when: + - Initial results are insufficient + - Topic is complex or nuanced + - Need comprehensive coverage + +### 2. Use Domain Filtering Strategically + +**Include domains for:** +- Academic research (`.edu` domains) +- Official documentation (official project sites) +- Trusted news sources +- Known authoritative sources + +**Exclude domains for:** +- Known low-quality content farms +- Irrelevant content types (Pinterest for non-visual queries) +- Sites with paywalls or access restrictions + +### 3. Optimize for Cost + +- Use `basic` depth as default +- Limit `max_results` to what you'll actually use +- Disable `include_raw_content` unless needed +- Cache results locally for repeated queries + +### 4. Handle Errors Gracefully + +The script provides helpful error messages: + +```bash +# Missing API key +Error: Tavily API key required +Setup: Set TAVILY_API_KEY environment variable or pass --api-key + +# Package not installed +Error: tavily-python package not installed +To install: pip install tavily-python +``` + +## Integration Patterns + +### Programmatic Usage + +```python +from tavily_search import search + +result = search( + query="What is machine learning?", + api_key="tvly-...", + search_depth="advanced", + max_results=10 +) + +if result.get("success"): + print(result["answer"]) + for item in result["results"]: + print(f"{item['title']}: {item['url']}") +``` + +### JSON Output for Parsing + +```bash +scripts/tavily_search.py "Python tutorials" --json > results.json +``` + +### Chaining with Other Tools + +```bash +# Search and extract content +scripts/tavily_search.py "React documentation" --json | \ + jq -r '.results[].url' | \ + xargs -I {} curl -s {} +``` + +## Comparison with Other Search APIs + +**vs Brave Search:** +- ✅ AI answer generation +- ✅ Raw content extraction +- ✅ Better domain filtering +- ❌ Slower than Brave +- ❌ Costs credits + +**vs Perplexity:** +- ✅ More control over sources +- ✅ Raw content available +- ✅ Dedicated news mode +- ≈ Similar answer quality +- ≈ Similar speed + +**vs Google Custom Search:** +- ✅ LLM-optimized results +- ✅ Answer generation +- ✅ Simpler API +- ❌ Smaller index +- ≈ Similar cost structure + +## Troubleshooting + +### Script Won't Run + +```bash +# Make executable +chmod +x scripts/tavily_search.py + +# Check Python version (requires 3.6+) +python3 --version + +# Install dependencies +pip install tavily-python +``` + +### API Key Issues + +```bash +# Verify API key format (should start with tvly-) +echo $TAVILY_API_KEY + +# Test with explicit key +scripts/tavily_search.py "test" --api-key "tvly-..." +``` + +### Rate Limit Errors + +- Check your plan's credit allocation at https://tavily.com +- Reduce `max_results` to conserve credits +- Use `basic` depth instead of `advanced` +- Implement local caching for repeated queries + +## Resources + +See [api-reference.md](references/api-reference.md) for: +- Complete API parameter documentation +- Response format specifications +- Error handling details +- Cost and rate limit information +- Advanced usage examples + +## Dependencies + +- Python 3.6+ +- `tavily-python` package (install: `pip install tavily-python`) +- Valid Tavily API key + +## Credits & Attribution + +- Tavily API: https://tavily.com +- Python SDK: https://github.com/tavily-ai/tavily-python +- Documentation: https://docs.tavily.com diff --git a/_meta.json b/_meta.json new file mode 100644 index 0000000..5a3dab4 --- /dev/null +++ b/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn7dak197zp1gy590j60ct8r7h7zte7t", + "slug": "tavily", + "version": "1.0.0", + "publishedAt": 1769287990264 +} \ No newline at end of file diff --git a/references/api-reference.md b/references/api-reference.md new file mode 100644 index 0000000..02d3c84 --- /dev/null +++ b/references/api-reference.md @@ -0,0 +1,187 @@ +# Tavily API Reference + +## Overview + +Tavily is a search engine optimized for Large Language Models (LLMs) and AI applications. It provides: + +- **AI-optimized results**: Results specifically formatted for LLM consumption +- **Answer generation**: Optional AI-generated summaries from search results +- **Raw content extraction**: Clean, parsed HTML content from sources +- **Domain filtering**: Include or exclude specific domains +- **Image search**: Relevant images for visual context +- **Topic specialization**: General or news-focused search + +## API Key Setup + +1. Visit https://tavily.com and sign up +2. Generate an API key from your dashboard +3. Store the key securely: + - **Recommended**: Add to Clawdbot config under `skills.entries.tavily.apiKey` + - **Alternative**: Set `TAVILY_API_KEY` environment variable + +## Search Parameters + +### Required + +- `query` (string): The search query + +### Optional + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `search_depth` | string | `"basic"` | `"basic"` (fast, ~1-2s) or `"advanced"` (comprehensive, ~5-10s) | +| `topic` | string | `"general"` | `"general"` or `"news"` (current events, last 7 days) | +| `max_results` | int | 5 | Number of results (1-10) | +| `include_answer` | bool | true | Include AI-generated answer summary | +| `include_raw_content` | bool | false | Include cleaned HTML content of sources | +| `include_images` | bool | false | Include relevant images | +| `include_domains` | list[str] | null | Only search these domains | +| `exclude_domains` | list[str] | null | Exclude these domains | + +## Response Format + +```json +{ + "success": true, + "query": "What is quantum computing?", + "answer": "Quantum computing is a type of computing that uses...", + "results": [ + { + "title": "Quantum Computing Explained", + "url": "https://example.com/quantum", + "content": "Quantum computing leverages...", + "score": 0.95, + "raw_content": null + } + ], + "images": ["https://example.com/image.jpg"], + "response_time": "1.67", + "usage": { + "credits": 1 + } +} +``` + +## Use Cases & Best Practices + +### When to Use Tavily + +1. **Research tasks**: Comprehensive information gathering +2. **Current events**: News-focused queries with `topic="news"` +3. **Domain-specific search**: Use `include_domains` for trusted sources +4. **Visual content**: Enable `include_images` for visual context +5. **LLM consumption**: Results are pre-formatted for AI processing + +### Search Depth Comparison + +| Depth | Speed | Results Quality | Use Case | +|-------|-------|-----------------|----------| +| `basic` | 1-2s | Good | Quick lookups, simple facts | +| `advanced` | 5-10s | Excellent | Research, complex topics, comprehensive analysis | + +**Recommendation**: Start with `basic`, use `advanced` for research tasks. + +### Domain Filtering + +**Include domains** (allowlist): +```python +include_domains=["python.org", "github.com", "stackoverflow.com"] +``` +Only search these specific domains - useful for trusted sources. + +**Exclude domains** (denylist): +```python +exclude_domains=["pinterest.com", "quora.com"] +``` +Remove unwanted or low-quality sources. + +### Topic Selection + +**General** (`topic="general"`): +- Default mode +- Broader web search +- Historical and evergreen content +- Best for most queries + +**News** (`topic="news"`): +- Last 7 days only +- News-focused sources +- Current events and developments +- Best for "latest", "recent", "current" queries + +## Cost & Rate Limits + +- **Credits**: Each search consumes credits (1 credit for basic search) +- **Free tier**: Check https://tavily.com/pricing for current limits +- **Rate limits**: Varies by plan tier + +## Error Handling + +Common errors: + +1. **Missing API key** + ```json + { + "error": "Tavily API key required", + "setup_instructions": "Set TAVILY_API_KEY environment variable" + } + ``` + +2. **Package not installed** + ```json + { + "error": "tavily-python package not installed", + "install_command": "pip install tavily-python" + } + ``` + +3. **Invalid API key** + ```json + { + "error": "Invalid API key" + } + ``` + +4. **Rate limit exceeded** + ```json + { + "error": "Rate limit exceeded" + } + ``` + +## Python SDK + +The skill uses the official `tavily-python` package: + +```python +from tavily import TavilyClient + +client = TavilyClient(api_key="tvly-...") +response = client.search( + query="What is AI?", + search_depth="advanced", + max_results=10 +) +``` + +Install: `pip install tavily-python` + +## Comparison with Other Search APIs + +| Feature | Tavily | Brave Search | Perplexity | +|---------|--------|--------------|------------| +| AI Answer | ✅ Yes | ❌ No | ✅ Yes | +| Raw Content | ✅ Yes | ❌ No | ❌ No | +| Domain Filtering | ✅ Yes | Limited | ❌ No | +| Image Search | ✅ Yes | ✅ Yes | ❌ No | +| News Mode | ✅ Yes | ✅ Yes | ✅ Yes | +| LLM Optimized | ✅ Yes | ❌ No | ✅ Yes | +| Speed | Medium | Fast | Medium | +| Free Tier | ✅ Yes | ✅ Yes | Limited | + +## Additional Resources + +- Official Docs: https://docs.tavily.com +- Python SDK: https://github.com/tavily-ai/tavily-python +- API Reference: https://docs.tavily.com/documentation/api-reference +- Pricing: https://tavily.com/pricing diff --git a/scripts/tavily_search.py b/scripts/tavily_search.py new file mode 100644 index 0000000..f23d5d6 --- /dev/null +++ b/scripts/tavily_search.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 +""" +Tavily AI Search - Optimized search for LLMs and AI applications +Requires: pip install tavily-python +""" + +import argparse +import json +import sys +import os +from typing import Optional, List + + +def search( + query: str, + api_key: str, + search_depth: str = "basic", + topic: str = "general", + max_results: int = 5, + include_answer: bool = True, + include_raw_content: bool = False, + include_images: bool = False, + include_domains: Optional[List[str]] = None, + exclude_domains: Optional[List[str]] = None, +) -> dict: + """ + Execute a Tavily search query. + + Args: + query: Search query string + api_key: Tavily API key (tvly-...) + search_depth: "basic" (fast) or "advanced" (comprehensive) + topic: "general" (default) or "news" (current events) + max_results: Number of results to return (1-10) + include_answer: Include AI-generated answer summary + include_raw_content: Include raw HTML content of sources + include_images: Include relevant images in results + include_domains: List of domains to specifically include + exclude_domains: List of domains to exclude + + Returns: + dict: Tavily API response + """ + try: + from tavily import TavilyClient + except ImportError: + return { + "error": "tavily-python package not installed. Run: pip install tavily-python", + "install_command": "pip install tavily-python" + } + + if not api_key: + return { + "error": "Tavily API key required. Get one at https://tavily.com", + "setup_instructions": "Set TAVILY_API_KEY environment variable or pass --api-key" + } + + try: + client = TavilyClient(api_key=api_key) + + # Build search parameters + search_params = { + "query": query, + "search_depth": search_depth, + "topic": topic, + "max_results": max_results, + "include_answer": include_answer, + "include_raw_content": include_raw_content, + "include_images": include_images, + } + + if include_domains: + search_params["include_domains"] = include_domains + if exclude_domains: + search_params["exclude_domains"] = exclude_domains + + response = client.search(**search_params) + + return { + "success": True, + "query": query, + "answer": response.get("answer"), + "results": response.get("results", []), + "images": response.get("images", []), + "response_time": response.get("response_time"), + "usage": response.get("usage", {}), + } + + except Exception as e: + return { + "error": str(e), + "query": query + } + + +def main(): + parser = argparse.ArgumentParser( + description="Tavily AI Search - Optimized search for LLMs", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Basic search + %(prog)s "What is quantum computing?" + + # Advanced search with more results + %(prog)s "Climate change solutions" --depth advanced --max-results 10 + + # News-focused search + %(prog)s "AI developments" --topic news + + # Domain filtering + %(prog)s "Python tutorials" --include-domains python.org --exclude-domains w3schools.com + + # Include images in results + %(prog)s "Eiffel Tower" --images + +Environment Variables: + TAVILY_API_KEY Your Tavily API key (get one at https://tavily.com) + """ + ) + + parser.add_argument( + "query", + help="Search query" + ) + + parser.add_argument( + "--api-key", + help="Tavily API key (or set TAVILY_API_KEY env var)" + ) + + parser.add_argument( + "--depth", + choices=["basic", "advanced"], + default="basic", + help="Search depth: 'basic' (fast) or 'advanced' (comprehensive)" + ) + + parser.add_argument( + "--topic", + choices=["general", "news"], + default="general", + help="Search topic: 'general' or 'news' (current events)" + ) + + parser.add_argument( + "--max-results", + type=int, + default=5, + help="Maximum number of results (1-10)" + ) + + parser.add_argument( + "--no-answer", + action="store_true", + help="Exclude AI-generated answer summary" + ) + + parser.add_argument( + "--raw-content", + action="store_true", + help="Include raw HTML content of sources" + ) + + parser.add_argument( + "--images", + action="store_true", + help="Include relevant images in results" + ) + + parser.add_argument( + "--include-domains", + nargs="+", + help="List of domains to specifically include" + ) + + parser.add_argument( + "--exclude-domains", + nargs="+", + help="List of domains to exclude" + ) + + parser.add_argument( + "--json", + action="store_true", + help="Output raw JSON response" + ) + + args = parser.parse_args() + + # Get API key from args or environment + api_key = args.api_key or os.getenv("TAVILY_API_KEY") + + result = search( + query=args.query, + api_key=api_key, + search_depth=args.depth, + topic=args.topic, + max_results=args.max_results, + include_answer=not args.no_answer, + include_raw_content=args.raw_content, + include_images=args.images, + include_domains=args.include_domains, + exclude_domains=args.exclude_domains, + ) + + if args.json: + print(json.dumps(result, indent=2)) + else: + if "error" in result: + print(f"Error: {result['error']}", file=sys.stderr) + if "install_command" in result: + print(f"\nTo install: {result['install_command']}", file=sys.stderr) + if "setup_instructions" in result: + print(f"\nSetup: {result['setup_instructions']}", file=sys.stderr) + sys.exit(1) + + # Format human-readable output + print(f"Query: {result['query']}") + print(f"Response time: {result.get('response_time', 'N/A')}s") + print(f"Credits used: {result.get('usage', {}).get('credits', 'N/A')}\n") + + if result.get("answer"): + print("=== AI ANSWER ===") + print(result["answer"]) + print() + + if result.get("results"): + print("=== RESULTS ===") + for i, item in enumerate(result["results"], 1): + print(f"\n{i}. {item.get('title', 'No title')}") + print(f" URL: {item.get('url', 'N/A')}") + print(f" Score: {item.get('score', 'N/A'):.3f}") + if item.get("content"): + content = item["content"] + if len(content) > 200: + content = content[:200] + "..." + print(f" {content}") + + if result.get("images"): + print(f"\n=== IMAGES ({len(result['images'])}) ===") + for img_url in result["images"][:5]: # Show first 5 + print(f" {img_url}") + + +if __name__ == "__main__": + main()