From a99d29da330412108c857af6991b70ff4d5a8088 Mon Sep 17 00:00:00 2001
From: zlei9 <zlei9@126.com>
Date: Sun, 29 Mar 2026 08:32:38 +0800
Subject: [PATCH] Initial commit with translated description

---
 SKILL.md          |  42 +++++++++++++++++
 _meta.json        |   6 +++
 references/api.md |  85 +++++++++++++++++++++++++++++++++
 scripts/crawl.py  | 117 ++++++++++++++++++++++++++++++++++++++++++++++
 scripts/scrape.py |  89 +++++++++++++++++++++++++++++++++++
 scripts/search.py |  77 ++++++++++++++++++++++++++++++
 6 files changed, 416 insertions(+)
 create mode 100644 SKILL.md
 create mode 100644 _meta.json
 create mode 100644 references/api.md
 create mode 100644 scripts/crawl.py
 create mode 100644 scripts/scrape.py
 create mode 100644 scripts/search.py

diff --git a/SKILL.md b/SKILL.md
new file mode 100644
index 0000000..46d0792
--- /dev/null
+++ b/SKILL.md
@@ -0,0 +1,42 @@
+---
+name: firecrawl
+description: "通过Firecrawl API进行网络搜索和抓取。在需要搜索网页、抓取网站（包括JS重型页面）、爬取整个站点或从网页提取结构化数据时使用。需要FIRECRAWL_API_KEY环境变量。"
+---
+
+# Firecrawl
+
+Web search and scraping via Firecrawl API.
+
+## Prerequisites
+
+Set `FIRECRAWL_API_KEY` in your environment or `.env` file:
+```bash
+export FIRECRAWL_API_KEY=fc-xxxxxxxxxx
+```
+
+## Quick Start
+
+### Search the web
+```bash
+firecrawl_search "your search query" --limit 10
+```
+
+### Scrape a single page
+```bash
+firecrawl_scrape "https://example.com"
+```
+
+### Crawl an entire site
+```bash
+firecrawl_crawl "https://example.com" --max-pages 50
+```
+
+## API Reference
+
+See [references/api.md](references/api.md) for detailed API documentation and advanced options.
+
+## Scripts
+
+- `scripts/search.py` - Search the web with Firecrawl
+- `scripts/scrape.py` - Scrape a single URL
+- `scripts/crawl.py` - Crawl an entire website
diff --git a/_meta.json b/_meta.json
new file mode 100644
index 0000000..b8a629d
--- /dev/null
+++ b/_meta.json
@@ -0,0 +1,6 @@
+{
+  "ownerId": "kn72sgg184w49a3mwz6fhdfc5s800zf0",
+  "slug": "firecrawl-search",
+  "version": "1.0.0",
+  "publishedAt": 1769544521483
+}
\ No newline at end of file
diff --git a/references/api.md b/references/api.md
new file mode 100644
index 0000000..26d6953
--- /dev/null
+++ b/references/api.md
@@ -0,0 +1,85 @@
+# Firecrawl API Reference
+
+## Environment
+
+Set your API key:
+```bash
+export FIRECRAWL_API_KEY=fc-xxxxxxxxxx
+```
+
+## API Endpoints
+
+### Search
+```bash
+POST https://api.firecrawl.dev/v1/search
+```
+
+Request body:
+```json
+{
+  "query": "search terms",
+  "limit": 10,
+  "lang": "en",
+  "country": "us"
+}
+```
+
+### Scrape
+```bash
+POST https://api.firecrawl.dev/v1/scrape
+```
+
+Request body:
+```json
+{
+  "url": "https://example.com",
+  "formats": ["markdown", "html", "screenshot"],
+  "onlyMainContent": true,
+  "includeTags": ["h1", "p", "article"],
+  "excludeTags": ["nav", "footer", "aside"]
+}
+```
+
+### Crawl
+```bash
+POST https://api.firecrawl.dev/v1/crawl
+```
+
+Request body:
+```json
+{
+  "url": "https://example.com",
+  "limit": 50,
+  "excludePaths": ["/blog", "/admin"],
+  "scrapeOptions": {
+    "formats": ["markdown"],
+    "onlyMainContent": true
+  }
+}
+```
+
+Check status:
+```bash
+GET https://api.firecrawl.dev/v1/crawl/{job_id}
+```
+
+## Response Format
+
+All responses follow this structure:
+```json
+{
+  "success": true,
+  "data": { ... },
+  "status": "completed"
+}
+```
+
+## Rate Limits
+
+- Search: Check your Firecrawl dashboard
+- Scrape: Check your Firecrawl dashboard
+- Crawl: Check your Firecrawl dashboard
+
+## Pricing
+
+See https://firecrawl.dev/pricing for current rates.
diff --git a/scripts/crawl.py b/scripts/crawl.py
new file mode 100644
index 0000000..be56e9e
--- /dev/null
+++ b/scripts/crawl.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+"""Firecrawl crawl script for crawling entire websites."""
+import argparse
+import json
+import os
+import sys
+import time
+import urllib.request
+from urllib.error import HTTPError
+
+
+def start_crawl(url: str, max_pages: int = 50, exclude_paths: list = None):
+    """Start a crawl job."""
+    api_key = os.environ.get("FIRECRAWL_API_KEY")
+    if not api_key:
+        print("Error: FIRECRAWL_API_KEY not set", file=sys.stderr)
+        sys.exit(1)
+    
+    req_url = "https://api.firecrawl.dev/v1/crawl"
+    
+    payload = {
+        "url": url,
+        "limit": max_pages,
+        "scrapeOptions": {
+            "formats": ["markdown"],
+            "onlyMainContent": True
+        }
+    }
+    
+    if exclude_paths:
+        payload["excludePaths"] = exclude_paths
+    
+    data = json.dumps(payload).encode()
+    
+    req = urllib.request.Request(
+        req_url,
+        data=data,
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        },
+        method="POST"
+    )
+    
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return json.loads(resp.read().decode())
+    except HTTPError as e:
+        print(f"Error: {e.code} - {e.reason}", file=sys.stderr)
+        sys.exit(1)
+
+
+def check_crawl_status(job_id: str):
+    """Check crawl job status."""
+    api_key = os.environ.get("FIRECRAWL_API_KEY")
+    req_url = f"https://api.firecrawl.dev/v1/crawl/{job_id}"
+    
+    req = urllib.request.Request(
+        req_url,
+        headers={"Authorization": f"Bearer {api_key}"}
+    )
+    
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return json.loads(resp.read().decode())
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Crawl a website via Firecrawl")
+    parser.add_argument("url", help="URL to start crawling from")
+    parser.add_argument("--max-pages", type=int, default=50, help="Max pages to crawl")
+    parser.add_argument("--wait", action="store_true", help="Wait for completion")
+    parser.add_argument("--json", action="store_true", help="Output raw JSON")
+    
+    args = parser.parse_args()
+    
+    # Start crawl
+    result = start_crawl(args.url, args.max_pages)
+    
+    if not result.get("success"):
+        print("Error: Failed to start crawl", file=sys.stderr)
+        print(json.dumps(result, indent=2))
+        sys.exit(1)
+    
+    job_id = result.get("id")
+    print(f"Crawl started: {job_id}")
+    
+    if not args.wait:
+        print(f"Check status with: firecrawl_crawl_status {job_id}")
+        return
+    
+    # Poll for completion
+    print("Waiting for completion...")
+    while True:
+        status = check_crawl_status(job_id)
+        
+        if status.get("status") in ["completed", "failed", "cancelled"]:
+            break
+        
+        print(f"Status: {status.get('status')}...")
+        time.sleep(2)
+    
+    if args.json:
+        print(json.dumps(status, indent=2))
+    else:
+        print(f"\nCrawl {status.get('status')}")
+        if "data" in status:
+            print(f"Pages crawled: {len(status['data'])}")
+            for page in status["data"]:
+                print(f"\n{'='*60}")
+                print(f"URL: {page.get('metadata', {}).get('sourceURL', 'N/A')}")
+                if "markdown" in page:
+                    preview = page["markdown"][:300] + "..." if len(page["markdown"]) > 300 else page["markdown"]
+                    print(f"Preview: {preview}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/scrape.py b/scripts/scrape.py
new file mode 100644
index 0000000..51a0f21
--- /dev/null
+++ b/scripts/scrape.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+"""Firecrawl scrape script for single URLs."""
+import argparse
+import json
+import os
+import sys
+import urllib.request
+from urllib.error import HTTPError
+
+
+def scrape(url: str, formats: list = None, only_main: bool = True):
+    """Scrape a URL using Firecrawl."""
+    api_key = os.environ.get("FIRECRAWL_API_KEY")
+    if not api_key:
+        print("Error: FIRECRAWL_API_KEY not set", file=sys.stderr)
+        sys.exit(1)
+    
+    formats = formats or ["markdown"]
+    
+    req_url = "https://api.firecrawl.dev/v1/scrape"
+    
+    data = json.dumps({
+        "url": url,
+        "formats": formats,
+        "onlyMainContent": only_main
+    }).encode()
+    
+    req = urllib.request.Request(
+        req_url,
+        data=data,
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        },
+        method="POST"
+    )
+    
+    try:
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            result = json.loads(resp.read().decode())
+            return result
+    except HTTPError as e:
+        print(f"Error: {e.code} - {e.reason}", file=sys.stderr)
+        print(e.read().decode(), file=sys.stderr)
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Scrape a URL via Firecrawl")
+    parser.add_argument("url", help="URL to scrape")
+    parser.add_argument("--html", action="store_true", help="Include HTML format")
+    parser.add_argument("--markdown", action="store_true", default=True, help="Include markdown format")
+    parser.add_argument("--screenshot", action="store_true", help="Include screenshot")
+    parser.add_argument("--json", action="store_true", help="Output raw JSON")
+    
+    args = parser.parse_args()
+    
+    formats = []
+    if args.html:
+        formats.append("html")
+    if args.markdown:
+        formats.append("markdown")
+    if args.screenshot:
+        formats.append("screenshot")
+    
+    result = scrape(args.url, formats)
+    
+    if args.json:
+        print(json.dumps(result, indent=2))
+        return
+    
+    # Pretty print results
+    if result.get("success") and "data" in result:
+        data = result["data"]
+        print(f"Title: {data.get('metadata', {}).get('title', 'N/A')}")
+        print(f"URL: {data.get('metadata', {}).get('sourceURL', args.url)}")
+        print(f"\n{'='*60}\n")
+        
+        if "markdown" in data:
+            print(data["markdown"])
+        elif "html" in data:
+            print(data["html"][:5000])
+    else:
+        print("Error: Failed to scrape")
+        print(json.dumps(result, indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/search.py b/scripts/search.py
new file mode 100644
index 0000000..1c054a1
--- /dev/null
+++ b/scripts/search.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+"""Firecrawl web search script."""
+import argparse
+import json
+import os
+import sys
+import urllib.request
+from urllib.error import HTTPError
+
+
+def search(query: str, limit: int = 10):
+    """Search the web using Firecrawl."""
+    api_key = os.environ.get("FIRECRAWL_API_KEY")
+    if not api_key:
+        print("Error: FIRECRAWL_API_KEY not set", file=sys.stderr)
+        sys.exit(1)
+    
+    url = "https://api.firecrawl.dev/v1/search"
+    
+    data = json.dumps({
+        "query": query,
+        "limit": limit,
+        "lang": "en",
+        "country": "us"
+    }).encode()
+    
+    req = urllib.request.Request(
+        url,
+        data=data,
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        },
+        method="POST"
+    )
+    
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            result = json.loads(resp.read().decode())
+            return result
+    except HTTPError as e:
+        print(f"Error: {e.code} - {e.reason}", file=sys.stderr)
+        print(e.read().decode(), file=sys.stderr)
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Search the web via Firecrawl")
+    parser.add_argument("query", help="Search query")
+    parser.add_argument("--limit", type=int, default=10, help="Max results")
+    parser.add_argument("--json", action="store_true", help="Output raw JSON")
+    
+    args = parser.parse_args()
+    
+    result = search(args.query, args.limit)
+    
+    if args.json:
+        print(json.dumps(result, indent=2))
+        return
+    
+    # Pretty print results
+    if result.get("success") and "data" in result:
+        for item in result["data"]:
+            print(f"\n{'='*60}")
+            print(f"Title: {item.get('title', 'N/A')}")
+            print(f"URL: {item.get('url', 'N/A')}")
+            print(f"Description: {item.get('description', 'N/A')}")
+            if "markdown" in item:
+                content = item["markdown"][:500] + "..." if len(item["markdown"]) > 500 else item["markdown"]
+                print(f"Content preview:\n{content}")
+    else:
+        print("No results found")
+        print(json.dumps(result, indent=2))
+
+
+if __name__ == "__main__":
+    main()