From 8067ab9461dbcfdd8b41cba599195cb28ba38a49 Mon Sep 17 00:00:00 2001
From: zlei9 <zlei9@126.com>
Date: Sun, 29 Mar 2026 13:15:43 +0800
Subject: [PATCH] Initial commit with translated description

---
 EXAMPLES.md  | 308 +++++++++++++++++++++++++++++
 REFERENCE.md | 535 +++++++++++++++++++++++++++++++++++++++++++++++++++
 SKILL.md     |  73 +++++++
 _meta.json   |   6 +
 setup.json   |  35 ++++
 5 files changed, 957 insertions(+)
 create mode 100644 EXAMPLES.md
 create mode 100644 REFERENCE.md
 create mode 100644 SKILL.md
 create mode 100644 _meta.json
 create mode 100644 setup.json

diff --git a/EXAMPLES.md b/EXAMPLES.md
new file mode 100644
index 0000000..abc35fb
--- /dev/null
+++ b/EXAMPLES.md
@@ -0,0 +1,308 @@
+# Browser Automation Examples
+
+This document provides detailed examples of common browser automation tasks using the CLI tool.
+
+## Example 1: Extract Product Information from E-commerce
+
+**User request**: "Go to example.com/product/123 and extract the product details"
+
+**Workflow**:
+
+1. **Navigate** to the product page:
+   ```bash
+   browser navigate https://example.com/product/123
+   ```
+
+2. **Extract** product data with schema:
+   ```bash
+   browser extract "Extract the product information" '{"productName": "string", "price": "number", "currency": "string", "inStock": "boolean", "rating": "number", "reviewCount": "number"}'
+   ```
+
+3. **Close** the browser:
+   ```bash
+   browser close
+   ```
+
+**Expected result**: JSON object with product details that can be analyzed or stored.
+
+---
+
+## Example 2: Fill Out and Submit a Contact Form
+
+**User request**: "Fill out the contact form on example.com with my information"
+
+**Workflow**:
+
+1. **Navigate** to contact page:
+   ```bash
+   browser navigate https://example.com/contact
+   ```
+
+2. **Act**: Fill in name field:
+   ```bash
+   browser act "Fill in the name field with 'John Doe'"
+   ```
+
+3. **Act**: Fill in email field:
+   ```bash
+   browser act "Fill in the email field with 'john.doe@example.com'"
+   ```
+
+4. **Act**: Fill in message field:
+   ```bash
+   browser act "Fill in the message field with 'I would like to inquire about your services'"
+   ```
+
+5. **Act**: Submit the form:
+   ```bash
+   browser act "Click the Submit button"
+   ```
+
+6. **Screenshot** to capture confirmation:
+   ```bash
+   browser screenshot
+   ```
+
+7. **Close** the browser:
+   ```bash
+   browser close
+   ```
+
+---
+
+## Example 3: Research and Summarize News Articles
+
+**User request**: "Check the latest tech news on techcrunch.com and summarize the top stories"
+
+**Workflow**:
+
+1. **Navigate** to news site:
+   ```bash
+   browser navigate https://techcrunch.com
+   ```
+
+2. **Extract** article headlines and summaries:
+   ```bash
+   browser extract "Extract the top 5 article headlines and their summaries" '{"headlines": "string", "summary": "string", "author": "string", "publishedDate": "string"}'
+   ```
+
+3. **Close** the browser:
+   ```bash
+   browser close
+   ```
+
+4. Analyze and summarize the extracted data using Claude's text analysis capabilities.
+
+---
+
+## Example 4: Login and Navigate Authenticated Area
+
+**User request**: "Log into example.com and navigate to my dashboard"
+
+**Workflow**:
+
+1. **Navigate** to login page:
+   ```bash
+   browser navigate https://example.com/login
+   ```
+
+2. **Act**: Fill in username:
+   ```bash
+   browser act "Fill in the username field with 'myusername'"
+   ```
+
+3. **Act**: Fill in password:
+   ```bash
+   browser act "Fill in the password field with 'mypassword'"
+   ```
+
+4. **Act**: Click login button:
+   ```bash
+   browser act "Click the Login button"
+   ```
+
+5. **Act**: Wait for page load:
+   ```bash
+   browser act "Wait for the page to fully load"
+   ```
+
+6. **Navigate** to dashboard:
+   ```bash
+   browser navigate https://example.com/dashboard
+   ```
+
+7. **Screenshot** the dashboard:
+   ```bash
+   browser screenshot
+   ```
+
+8. **Close** the browser:
+   ```bash
+   browser close
+   ```
+
+**Note**: This example uses Chrome's user profile (`.chrome-profile/`) which may preserve session cookies between runs.
+
+---
+
+## Example 5: Search and Collect Results
+
+**User request**: "Search Google for 'best TypeScript practices' and get the top 5 results"
+
+**Workflow**:
+
+1. **Navigate** to Google:
+   ```bash
+   browser navigate https://www.google.com
+   ```
+
+2. **Act**: Perform search:
+   ```bash
+   browser act "Type 'best TypeScript practices' in the search box and press Enter"
+   ```
+
+3. **Act**: Wait for results:
+   ```bash
+   browser act "Wait for search results to load"
+   ```
+
+4. **Extract** search results:
+   ```bash
+   browser extract "Extract the top 5 search results" '{"title": "string", "url": "string", "snippet": "string"}'
+   ```
+
+5. **Close** the browser:
+   ```bash
+   browser close
+   ```
+
+---
+
+## Example 6: Download a File
+
+**User request**: "Download the PDF file from example.com/documents/report.pdf"
+
+**Workflow**:
+
+1. **Navigate** to the file URL:
+   ```bash
+   browser navigate https://example.com/documents/report.pdf
+   ```
+
+2. **Act**: Wait for download to start:
+   ```bash
+   browser act "Wait for 5 seconds for the download to complete"
+   ```
+
+3. **Close** the browser:
+   ```bash
+   browser close
+   ```
+
+**Note**: Files are automatically downloaded to `./agent/downloads/` directory due to CDP configuration.
+
+---
+
+## Example 7: Debugging a Page Issue
+
+**User request**: "Check why the submit button isn't working on example.com/form"
+
+**Workflow**:
+
+1. **Navigate** to the form page:
+   ```bash
+   browser navigate https://example.com/form
+   ```
+
+2. **Screenshot** initial state:
+   ```bash
+   browser screenshot
+   ```
+
+3. **Observe** available elements:
+   ```bash
+   browser observe "Find all buttons and their states"
+   ```
+
+4. **Observe** form fields:
+   ```bash
+   browser observe "Find all form input fields and their required status"
+   ```
+
+5. **Act**: Try filling required fields:
+   ```bash
+   browser act "Fill in all required fields with test data"
+   ```
+
+6. **Screenshot** after filling:
+   ```bash
+   browser screenshot
+   ```
+
+7. **Observe** button state again:
+   ```bash
+   browser observe "Check if the submit button is now enabled"
+   ```
+
+8. **Close** the browser:
+   ```bash
+   browser close
+   ```
+
+Analyze the screenshots and observations to determine the issue.
+
+---
+
+## Example 8: Multi-Page Data Collection
+
+**User request**: "Extract product information from the first 3 pages of results on example.com/products"
+
+**Workflow**:
+
+1. **Navigate** to products page:
+   ```bash
+   browser navigate https://example.com/products
+   ```
+
+2. **Extract** products from page 1:
+   ```bash
+   browser extract "Extract all products on this page" '{"name": "string", "price": "number", "imageUrl": "string"}'
+   ```
+
+3. **Act**: Click next page:
+   ```bash
+   browser act "Click the Next Page button"
+   ```
+
+4. **Extract** products from page 2:
+   ```bash
+   browser extract "Extract all products on this page" '{"name": "string", "price": "number", "imageUrl": "string"}'
+   ```
+
+5. **Act**: Click next page:
+   ```bash
+   browser act "Click the Next Page button"
+   ```
+
+6. **Extract** products from page 3:
+   ```bash
+   browser extract "Extract all products on this page" '{"name": "string", "price": "number", "imageUrl": "string"}'
+   ```
+
+7. **Close** the browser:
+   ```bash
+   browser close
+   ```
+
+Combine and process all extracted data.
+
+---
+
+## Tips for Success
+
+- **Be specific with natural language**: "Click the blue Submit button in the footer" is better than "click submit". This is **extremely important** because there's much ambiguity in many websites. 
+- **Wait when needed**: After navigation or actions that trigger page changes, explicitly wait
+- **Use observe for discovery**: When unsure what elements exist, use observe first
+- **Take screenshots for debugging**: Visual confirmation helps understand what the browser sees
+- **Handle errors gracefully**: If an action fails, try breaking it into smaller steps
+- **Clean up resources**: Always close the browser when done to free up system resources
diff --git a/REFERENCE.md b/REFERENCE.md
new file mode 100644
index 0000000..7f3dff9
--- /dev/null
+++ b/REFERENCE.md
@@ -0,0 +1,535 @@
+# Browser Automation CLI Reference
+
+This document provides detailed technical reference for the CLI browser automation tool.
+
+## Architecture Overview
+
+The browser automation system consists of:
+
+- **Stagehand**: TypeScript library wrapping Playwright for AI-driven browser control. Uses AI model to find and interact with the right elements, so be specific
+- **Chrome CDP**: Chrome DevTools Protocol connection on port 9222
+- **CLI Tool**: Command-line interface in `src/cli.ts` for browser automation
+- **Local Chrome**: Chrome browser launched with remote debugging enabled
+
+### File Locations
+
+- **Chrome Profile**: `.chrome-profile/` - Persistent browser profile directory
+- **Screenshots**: `./agent/browser_screenshots/` - Screenshot output directory
+- **Downloads**: `./agent/downloads/` - File download directory
+
+## CLI Command Reference
+
+### navigate
+
+Navigate to a URL in the browser.
+
+**Usage**:
+```bash
+browser navigate <url>
+```
+
+**Parameters**:
+- `url` (string, required): The URL to navigate to. Must include protocol (http:// or https://)
+
+**Returns**:
+JSON output:
+```json
+{
+  "success": true,
+  "message": "Successfully navigated to <url>",
+  "screenshot": "/path/to/screenshot.png"
+}
+```
+
+**Implementation Details**:
+- Uses Playwright's `page.goto()` under the hood
+- Waits for network idle and DOM content loaded
+- Automatically takes a screenshot after navigation
+- Supports HTTPS upgrade for HTTP URLs
+
+**Example**:
+```bash
+browser navigate https://example.com
+```
+
+**Error Handling**:
+- Invalid URLs return error with `success: false`
+- Network timeouts return timeout error
+- SSL certificate errors may fail navigation
+
+---
+
+### act
+
+Perform an action on the page using natural language.
+
+**Usage**:
+```bash
+browser act "<action>"
+```
+
+**Parameters**:
+- `action` (string, required): Natural language description of the action to perform
+
+**Returns**:
+JSON output:
+```json
+{
+  "success": true,
+  "message": "Successfully performed action: <action>",
+  "screenshot": "/path/to/screenshot.png"
+}
+```
+
+Note: Without specificity it might succeed on the wrong element!
+
+**Implementation Details**:
+- Uses Stagehand's `page.act()` which leverages Claude Haiku 4.5
+- AI model interprets natural language and executes corresponding browser actions
+- Supports: clicking, typing, selecting, scrolling, waiting, hovering, and more
+- Automatically handles element location and interaction
+- Automatically takes a screenshot after the action
+
+**Natural Language Examples**:
+```bash
+browser act "Click the login button"
+browser act "Fill in email field with test@example.com"
+browser act "Scroll to the bottom of the page"
+browser act "Select 'California' from the state dropdown"
+browser act "Hover over the menu icon"
+browser act "Wait for 3 seconds"
+browser act "Press the Enter key"
+browser act "Double-click the file icon"
+```
+
+**Best Practices**:
+- Be **specific** about which element to interact with
+- Include visual descriptors ("button next to the form", "top menu", "form at bottom")
+- For ambiguous elements, mention nearby context
+- Break complex actions into multiple simple actions
+
+**Error Handling**:
+- Element not found errors indicate selector couldn't be resolved
+- Timeout errors occur when action takes too long
+- Action not possible errors indicate element state prevents action
+- All errors return JSON with `success: false`
+
+---
+
+### extract
+
+Extract structured data from the current page using a schema.
+
+**Usage**:
+```bash
+browser extract "<instruction>" '{"field": "type"}'
+```
+
+**Parameters**:
+- `instruction` (string, required): Natural language description of what to extract
+- `schema` (JSON string, required): Schema definition mapping field names to types
+
+**Schema Types**:
+- `"string"`: Text content
+- `"number"`: Numeric values (integers or floats)
+- `"boolean"`: True/false values
+
+**Returns**:
+JSON output:
+```json
+{
+  "success": true,
+  "data": {
+    "field1": "value",
+    "field2": 123
+  }
+}
+```
+
+**Implementation Details**:
+- Uses Stagehand's `page.extract()` with Zod schema validation
+- AI model (Claude Haiku 4.5) identifies relevant page elements
+- Automatically handles pagination and dynamic content
+- Validates extracted data against schema
+
+**Schema Example**:
+```bash
+browser extract "Extract the product information" '{"productName": "string", "price": "number", "inStock": "boolean", "description": "string", "rating": "number"}'
+```
+
+**Complex Extraction Example**:
+```bash
+browser extract "Extract all items from the shopping cart" '{"itemName": "string", "quantity": "number", "unitPrice": "number", "totalPrice": "number", "imageUrl": "string"}'
+```
+
+**Best Practices**:
+- Use clear, descriptive field names
+- Match schema types to expected data types
+- Provide specific extraction instructions
+- Handle missing data by checking result properties
+
+**Error Handling**:
+- Schema validation errors indicate type mismatch
+- Extraction failures occur when data not found on page
+- Timeout errors for pages that take too long to analyze
+- All errors return JSON with `success: false`
+
+---
+
+### observe
+
+Discover available actions on the page.
+
+**Usage**:
+```bash
+browser observe "<query>"
+```
+
+**Parameters**:
+- `query` (string, required): Natural language query to discover elements
+
+**Returns**:
+JSON output:
+```json
+{
+  "success": true,
+  "data": [
+    {
+      "selector": "button.submit-btn",
+      "text": "Submit Form",
+      "type": "button",
+      "visible": true,
+      "enabled": true
+    }
+  ]
+}
+```
+
+**Implementation Details**:
+- Uses Stagehand's `page.observe()` to scan page elements
+- Returns actionable elements matching the query
+- Provides element properties, states, and available actions
+
+**Query Examples**:
+```bash
+browser observe "Find all buttons"
+browser observe "Find clickable links in the navigation"
+browser observe "Find form input fields"
+browser observe "Find all submit buttons"
+browser observe "Find elements with text 'Login'"
+browser observe "Find all images"
+```
+
+**Use Cases**:
+- Page exploration and discovery
+- Debugging action failures
+- Understanding page structure
+- Finding dynamic element selectors
+
+**Error Handling**:
+- Empty array returned when no elements match
+- Timeout for pages that take too long to scan
+- All errors return JSON with `success: false`
+
+---
+
+### screenshot
+
+Take a screenshot of the current page.
+
+**Usage**:
+```bash
+browser screenshot
+```
+
+**Parameters**: None
+
+**Returns**:
+JSON output:
+```json
+{
+  "success": true,
+  "screenshot": "/path/to/screenshot.png"
+}
+```
+
+**Implementation Details**:
+- Captures full viewport at current scroll position
+- Saves as PNG format with timestamp in filename
+- Automatically resizes images larger than 2000x2000 pixels using Sharp
+- Uses lossless PNG compression
+
+**Screenshot Path Format**:
+```
+./agent/browser_screenshots/screenshot-YYYY-MM-DDTHH-MM-SS-mmmZ.png
+```
+
+**Example**:
+```bash
+browser screenshot
+```
+
+**Image Processing**:
+- Original resolution preserved if ≤ 2000x2000
+- Larger images resized to fit within 2000x2000 while maintaining aspect ratio
+- Uses Sharp library for high-quality image processing
+
+**Best Practices**:
+- Take screenshots before and after important actions
+- Use for visual debugging and verification
+- Screenshot after navigation to confirm page loaded
+- Capture error states for troubleshooting
+
+**Error Handling**:
+- Directory creation errors if screenshots folder can't be created
+- CDP errors if Chrome DevTools Protocol connection fails
+- File write errors if disk space insufficient
+- All errors return JSON with `success: false`
+
+---
+
+### close
+
+Close the browser and cleanup resources.
+
+**Usage**:
+```bash
+browser close
+```
+
+**Parameters**: None
+
+**Returns**:
+JSON output:
+```json
+{
+  "success": true,
+  "message": "Browser closed"
+}
+```
+
+**Implementation Details**:
+- Calls `stagehand.close()` to clean up Playwright resources
+- Kills Chrome process if it was started by the CLI tool
+- Clears internal state variables
+- Does NOT delete `.chrome-profile/` directory (preserved for reuse)
+
+**Resource Cleanup**:
+- Closes all browser tabs and windows
+- Terminates Chrome process (only if started by this tool)
+- Releases CDP connection
+- Clears Stagehand instance
+
+**Best Practices**:
+- Always call at the end of browser automation tasks
+- Call even if errors occurred during automation
+- Don't call mid-workflow unless explicitly needed
+
+**Error Handling**:
+- Continues cleanup even if some steps fail
+- Safe to call multiple times
+- Gracefully handles already-closed browser
+- All errors return JSON with `success: false`
+
+---
+
+## Configuration Details
+
+### Stagehand Initialization
+
+The Stagehand instance is configured in `src/cli.ts` with:
+
+```typescript
+new Stagehand({
+  env: "LOCAL",
+  verbose: 0,
+  enableCaching: true,
+  model: "anthropic/claude-haiku-4-5-20251001",
+  localBrowserLaunchOptions: {
+    cdpUrl: wsUrl,
+  },
+})
+```
+
+**Configuration Options**:
+- `env: "LOCAL"`: Uses local Chrome instead of remote browser
+- `verbose: 0`: Minimal logging output
+- `enableCaching: true`: Caches page analysis for better performance
+- `modelName`: Claude Haiku 4.5 for AI-driven actions and extraction
+- `cdpUrl`: Chrome DevTools Protocol endpoint
+
+### Chrome Launch Arguments
+
+Chrome is launched by `src/cli.ts` with:
+
+```bash
+--remote-debugging-port=9222
+--user-data-dir=.chrome-profile
+--window-position=-9999,-9999
+--window-size=1280,720
+```
+
+**Arguments**:
+- `--remote-debugging-port`: Enables CDP on port 9222
+- `--user-data-dir`: Persistent profile directory for session/cookie persistence
+- `--window-position`: Launches minimized off-screen
+- `--window-size`: Default window size
+
+### Download Configuration
+
+Downloads are configured via CDP:
+
+```typescript
+await client.send("Browser.setDownloadBehavior", {
+  behavior: "allow",
+  downloadPath: "./agent/downloads",
+  eventsEnabled: true,
+})
+```
+
+**Behavior**:
+- Downloads start automatically (no dialog)
+- Files saved to `./agent/downloads/`
+- Download events can be monitored via CDP
+
+---
+
+## Error Messages Reference
+
+### Common Errors
+
+**"Could not find local Chrome installation"**
+- Cause: Chrome/Chromium not installed or not in standard locations
+- Solution: Install Chrome from https://www.google.com/chrome/
+
+**"Chrome failed to start with remote debugging on port 9222"**
+- Cause: Port 9222 already in use or Chrome can't bind to port
+- Solution: Close other Chrome instances or change CDP port
+
+**"Browser failed to become ready within timeout"**
+- Cause: Chrome launched but page context not ready
+- Solution: Check Chrome version compatibility, restart system
+
+**"Error performing action: element not found"**
+- Cause: Natural language description didn't match any page element
+- Solution: Use more specific description or use observe to find elements
+
+**"Error extracting data: schema validation failed"**
+- Cause: Extracted data type doesn't match schema
+- Solution: Verify schema types match actual page data
+
+**"Error taking screenshot: directory not writable"**
+- Cause: Insufficient permissions for screenshots directory
+- Solution: Check file permissions on `./agent/browser_screenshots/`
+
+---
+
+## Performance Considerations
+
+### Caching
+
+Stagehand caches page analysis to improve performance on repeated actions. Cache is maintained for:
+- Element selectors
+- Page structure analysis
+- Vision model results
+
+### Timeouts
+
+Default timeouts:
+- Navigation: 30 seconds
+- Action execution: 30 seconds
+- Extraction: 60 seconds
+- CDP connection: 15 seconds (50 retries × 300ms)
+
+### Resource Usage
+
+Browser automation consumes:
+- Memory: ~200-500MB for Chrome process
+- CPU: Variable based on page complexity
+- Disk: ~50-200MB for Chrome profile
+- Network: Depends on pages visited
+
+---
+
+## Security Considerations
+
+### Credential Handling
+
+- Browser uses persistent profile (`.chrome-profile/`)
+- Saved passwords and cookies persist between sessions
+- Consider using isolated profiles for sensitive operations
+
+### Download Safety
+
+- Downloads automatically saved to `./agent/downloads/`
+- No file type restrictions enforced
+- Verify downloaded file integrity before use
+
+### Network Access
+
+- Browser has full network access
+- Respects system proxy settings
+- Can access localhost and internal networks
+
+---
+
+## Debugging Tips
+
+### Enable Verbose Logging
+
+Edit `src/cli.ts` and change verbose level in Stagehand configuration:
+
+```typescript
+// Change verbose: 0 to verbose: 1 or 2
+verbose: 2,  // Maximum verbosity
+```
+
+### View Chrome Console
+
+Connect to Chrome DevTools manually:
+1. Open Chrome
+2. Navigate to `chrome://inspect`
+3. Click "inspect" under Remote Target
+
+### Check CDP Connection
+
+Test CDP endpoint:
+```bash
+curl http://localhost:9222/json/version
+```
+
+### Monitor Browser Process
+
+Check Chrome process:
+```bash
+ps aux | grep chrome
+```
+
+### View Screenshots
+
+Screenshots provide visual debugging:
+```bash
+ls -lh ./agent/browser_screenshots/
+open ./agent/browser_screenshots/screenshot-*.png
+```
+
+### Test CLI Commands
+
+Test individual commands:
+```bash
+browser navigate https://example.com
+browser screenshot
+browser close
+```
+
+---
+
+## Version Information
+
+- **Stagehand**: Uses `@browserbasehq/stagehand` package v2.5.2+
+- **Model**: Claude Haiku 4.5 (claude-haiku-4-5-20251001) for browser actions
+- **CLI Tool**: TypeScript CLI in `src/cli.ts`
+- **Agent SDK**: `@anthropic-ai/claude-agent-sdk` for conversation framework
+- **Browser**: Local Chrome/Chromium installation
+
+For updates and changelog, see the main project repository.
diff --git a/SKILL.md b/SKILL.md
new file mode 100644
index 0000000..33dc713
--- /dev/null
+++ b/SKILL.md
@@ -0,0 +1,73 @@
+---
+name: browser
+description: "使用自然语言自动化网页浏览器交互。"
+allowed-tools: Bash
+---
+
+# Browser Automation
+
+Automate browser interactions using Stagehand CLI with Claude.
+
+### First: Environment Selection (Local vs Remote)
+
+The skill automatically selects between local and remote browser environments:
+- **If Browserbase API keys exist** (BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID in .env file): Uses remote Browserbase environment
+- **If no Browserbase API keys**: Falls back to local Chrome browser
+- **No user prompting**: The selection happens automatically based on available configuration
+
+## Setup (First Time Only)
+
+Check `setup.json` in this directory. If `setupComplete: false`:
+
+```bash
+npm install    # Install dependencies
+npm link       # Create global 'browser' command
+```
+
+## Commands
+
+All commands work identically in both modes:
+
+```bash
+browser navigate <url>                    # Go to URL
+browser act "<action>"                    # Natural language action
+browser extract "<instruction>" ['{}']    # Extract data (optional schema)
+browser observe "<query>"                 # Discover elements
+browser screenshot                        # Take screenshot
+browser close                             # Close browser
+```
+
+## Quick Example
+
+```bash
+browser navigate https://example.com
+browser act "click the Sign In button"
+browser extract "get the page title"
+browser close
+```
+
+## Mode Comparison
+
+| Feature | Local | Browserbase |
+|---------|-------|-------------|
+| Speed | Faster | Slightly slower |
+| Setup | Chrome required | API key required |
+| Stealth mode | No | Yes |
+| Proxy/CAPTCHA | No | Yes |
+| Best for | Development | Production/scraping |
+
+## Best Practices
+
+1. **Always navigate first** before interacting
+2. **View screenshots** after each command to verify
+3. **Be specific** in action descriptions
+4. **Close browser** when done
+
+## Troubleshooting
+
+- **Chrome not found**: Install Chrome or use Browserbase mode
+- **Action fails**: Use `browser observe` to discover available elements
+- **Browserbase fails**: Verify API key and project ID are set
+
+For detailed examples, see [EXAMPLES.md](EXAMPLES.md).
+For API reference, see [REFERENCE.md](REFERENCE.md).
diff --git a/_meta.json b/_meta.json
new file mode 100644
index 0000000..997dabf
--- /dev/null
+++ b/_meta.json
@@ -0,0 +1,6 @@
+{
+  "ownerId": "kn7ccvk3zbxj1yzzsxne9tfayn801hks",
+  "slug": "browser-automation",
+  "version": "1.0.1",
+  "publishedAt": 1770307243177
+}
\ No newline at end of file
diff --git a/setup.json b/setup.json
new file mode 100644
index 0000000..d9ac900
--- /dev/null
+++ b/setup.json
@@ -0,0 +1,35 @@
+{
+  "setupComplete": false,
+  "prerequisites": {
+    "chrome": {
+      "required": true,
+      "installed": false,
+      "description": "Google Chrome browser"
+    },
+    "dependencies": {
+      "required": true,
+      "installed": false,
+      "description": "Node.js dependencies (npm install completed)"
+    },
+    "apiKey": {
+      "required": true,
+      "configured": false,
+      "description": "ANTHROPIC_API_KEY exported (i.e $ANTHROPIC_API_KEY) or in .env file"
+    },
+    "browserCommand": {
+      "required": true,
+      "installed": false,
+      "description": "Browser CLI command globally linked (npm link)"
+    }
+  },
+  "setupInstructions": [
+    "1. Run: npm install (this will automatically build TypeScript)",
+    "2. Run: npm link (this creates the global 'browser' command)",
+    "3. (RECOMMENDED) Export ANTHROPIC_API_KEY: export ANTHROPIC_API_KEY='your-api-key-here' (check if already exported)",
+    "   OR alternatively create .env file: cp .env.example .env and edit it to add your API key",
+    "4. Ensure Google Chrome is installed on your system",
+    "5. Test installation: browser navigate https://example.com",
+    "6. Update this setup.json file: set all 'installed'/'configured' to true and 'setupComplete' to true"
+  ],
+  "verifySetup": "Run 'browser navigate https://example.com' from any directory to verify installation"
+}