From 4ca4dbf3969252d3b1a3449621e183c9718e603b Mon Sep 17 00:00:00 2001 From: zlei9 Date: Sun, 29 Mar 2026 13:18:25 +0800 Subject: [PATCH] Initial commit with translated description --- SKILL.md | 215 +++++++++++++++++++++++++++++++++++++ _meta.json | 6 ++ scripts/click.sh | 46 ++++++++ scripts/cursor_position.sh | 9 ++ scripts/drag.sh | 23 ++++ scripts/hold_key.sh | 29 +++++ scripts/key.sh | 19 ++++ scripts/minimal-desktop.sh | 53 +++++++++ scripts/mouse_down.sh | 7 ++ scripts/mouse_move.sh | 16 +++ scripts/mouse_up.sh | 11 ++ scripts/screenshot.sh | 24 +++++ scripts/scroll.sh | 49 +++++++++ scripts/setup-vnc.sh | 144 +++++++++++++++++++++++++ scripts/type_text.sh | 30 ++++++ scripts/vnc_start.sh | 33 ++++++ scripts/vnc_stop.sh | 16 +++ scripts/wait.sh | 24 +++++ scripts/zoom.sh | 49 +++++++++ 19 files changed, 803 insertions(+) create mode 100644 SKILL.md create mode 100644 _meta.json create mode 100644 scripts/click.sh create mode 100644 scripts/cursor_position.sh create mode 100644 scripts/drag.sh create mode 100644 scripts/hold_key.sh create mode 100644 scripts/key.sh create mode 100644 scripts/minimal-desktop.sh create mode 100644 scripts/mouse_down.sh create mode 100644 scripts/mouse_move.sh create mode 100644 scripts/mouse_up.sh create mode 100644 scripts/screenshot.sh create mode 100644 scripts/scroll.sh create mode 100644 scripts/setup-vnc.sh create mode 100644 scripts/type_text.sh create mode 100644 scripts/vnc_start.sh create mode 100644 scripts/vnc_stop.sh create mode 100644 scripts/wait.sh create mode 100644 scripts/zoom.sh diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..7d615dd --- /dev/null +++ b/SKILL.md @@ -0,0 +1,215 @@ +--- +name: computer-use +description: "用于无头Linux服务器的完整桌面计算机使用。" +version: 1.2.1 +--- + +# Computer Use Skill + +Full desktop GUI control for headless Linux servers. Creates a virtual display (Xvfb + XFCE) so you can run and control desktop applications on VPS/cloud instances without a physical monitor. + +## Environment + +- **Display**: `:99` +- **Resolution**: 1024x768 (XGA, Anthropic recommended) +- **Desktop**: XFCE4 (minimal — xfwm4 + panel only) + +## Quick Setup + +Run the setup script to install everything (systemd services, flicker-free VNC): + +```bash +./scripts/setup-vnc.sh +``` + +This installs: +- Xvfb virtual display on `:99` +- Minimal XFCE desktop (xfwm4 + panel, no xfdesktop) +- x11vnc with stability flags +- noVNC for browser access + +All services auto-start on boot and auto-restart on crash. + +## Actions Reference + +| Action | Script | Arguments | Description | +|--------|--------|-----------|-------------| +| screenshot | `screenshot.sh` | — | Capture screen → base64 PNG | +| cursor_position | `cursor_position.sh` | — | Get current mouse X,Y | +| mouse_move | `mouse_move.sh` | x y | Move mouse to coordinates | +| left_click | `click.sh` | x y left | Left click at coordinates | +| right_click | `click.sh` | x y right | Right click | +| middle_click | `click.sh` | x y middle | Middle click | +| double_click | `click.sh` | x y double | Double click | +| triple_click | `click.sh` | x y triple | Triple click (select line) | +| left_click_drag | `drag.sh` | x1 y1 x2 y2 | Drag from start to end | +| left_mouse_down | `mouse_down.sh` | — | Press mouse button | +| left_mouse_up | `mouse_up.sh` | — | Release mouse button | +| type | `type_text.sh` | "text" | Type text (50 char chunks, 12ms delay) | +| key | `key.sh` | "combo" | Press key (Return, ctrl+c, alt+F4) | +| hold_key | `hold_key.sh` | "key" secs | Hold key for duration | +| scroll | `scroll.sh` | dir amt [x y] | Scroll up/down/left/right | +| wait | `wait.sh` | seconds | Wait then screenshot | +| zoom | `zoom.sh` | x1 y1 x2 y2 | Cropped region screenshot | + +## Usage Examples + +```bash +export DISPLAY=:99 + +# Take screenshot +./scripts/screenshot.sh + +# Click at coordinates +./scripts/click.sh 512 384 left + +# Type text +./scripts/type_text.sh "Hello world" + +# Press key combo +./scripts/key.sh "ctrl+s" + +# Scroll down +./scripts/scroll.sh down 5 +``` + +## Workflow Pattern + +1. **Screenshot** — Always start by seeing the screen +2. **Analyze** — Identify UI elements and coordinates +3. **Act** — Click, type, scroll +4. **Screenshot** — Verify result +5. **Repeat** + +## Tips + +- Screen is 1024x768, origin (0,0) at top-left +- Click to focus before typing in text fields +- Use `ctrl+End` to jump to page bottom in browsers +- Most actions auto-screenshot after 2 sec delay +- Long text is chunked (50 chars) with 12ms keystroke delay + +## Live Desktop Viewing (VNC) + +Watch the desktop in real-time via browser or VNC client. + +### Connect via Browser + +```bash +# SSH tunnel (run on your local machine) +ssh -L 6080:localhost:6080 your-server + +# Open in browser +http://localhost:6080/vnc.html +``` + +### Connect via VNC Client + +```bash +# SSH tunnel +ssh -L 5900:localhost:5900 your-server + +# Connect VNC client to localhost:5900 +``` + +### SSH Config (recommended) + +Add to `~/.ssh/config` for automatic tunneling: + +``` +Host your-server + HostName your.server.ip + User your-user + LocalForward 6080 127.0.0.1:6080 + LocalForward 5900 127.0.0.1:5900 +``` + +Then just `ssh your-server` and VNC is available. + +## System Services + +```bash +# Check status +systemctl status xvfb xfce-minimal x11vnc novnc + +# Restart if needed +sudo systemctl restart xvfb xfce-minimal x11vnc novnc +``` + +### Service Chain + +``` +xvfb → xfce-minimal → x11vnc → novnc +``` + +- **xvfb**: Virtual display :99 (1024x768x24) +- **xfce-minimal**: Watchdog that runs xfwm4+panel, kills xfdesktop +- **x11vnc**: VNC server with `-noxdamage` for stability +- **novnc**: WebSocket proxy with heartbeat for connection stability + +## Opening Applications + +```bash +export DISPLAY=:99 + +# Chrome — only use --no-sandbox if the kernel lacks user namespace support. +# Check: cat /proc/sys/kernel/unprivileged_userns_clone +# 1 = sandbox works, do NOT use --no-sandbox +# 0 = sandbox fails, --no-sandbox required as fallback +# Using --no-sandbox when unnecessary causes instability and crashes. +if [ "$(cat /proc/sys/kernel/unprivileged_userns_clone 2>/dev/null)" = "0" ]; then + google-chrome --no-sandbox & +else + google-chrome & +fi + +xfce4-terminal & # Terminal +thunar & # File manager +``` + +**Note**: Snap browsers (Firefox, Chromium) have sandbox issues on headless servers. Use Chrome `.deb` instead: + +```bash +wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb +sudo dpkg -i google-chrome-stable_current_amd64.deb +sudo apt-get install -f +``` + +## Manual Setup + +If you prefer manual setup instead of `setup-vnc.sh`: + +```bash +# Install packages +sudo apt install -y xvfb xfce4 xfce4-terminal xdotool scrot imagemagick dbus-x11 x11vnc novnc websockify + +# Run the setup script (generates systemd services, masks xfdesktop, starts everything) +./scripts/setup-vnc.sh +``` + +If you prefer fully manual setup, the `setup-vnc.sh` script generates all systemd service files inline -- read it for the exact service definitions. + +## Troubleshooting + +### VNC shows black screen +- Check if xfwm4 is running: `pgrep xfwm4` +- Restart desktop: `sudo systemctl restart xfce-minimal` + +### VNC flickering/flashing +- Ensure xfdesktop is masked (check `/usr/bin/xfdesktop`) +- xfdesktop causes flicker due to clear→draw cycles on Xvfb + +### VNC disconnects frequently +- Check noVNC has `--heartbeat 30` flag +- Check x11vnc has `-noxdamage` flag + +### x11vnc crashes (SIGSEGV) +- Add `-noxdamage -noxfixes` flags +- The DAMAGE extension causes crashes on Xvfb + +## Requirements + +Installed by `setup-vnc.sh`: +```bash +xvfb xfce4 xfce4-terminal xdotool scrot imagemagick dbus-x11 x11vnc novnc websockify +``` diff --git a/_meta.json b/_meta.json new file mode 100644 index 0000000..72f6366 --- /dev/null +++ b/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn7cew7yks7cgeynqqjn8asxvx80axjx", + "slug": "computer-use", + "version": "1.2.1", + "publishedAt": 1771195222594 +} \ No newline at end of file diff --git a/scripts/click.sh b/scripts/click.sh new file mode 100644 index 0000000..a14dbf9 --- /dev/null +++ b/scripts/click.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# click.sh - Click at coordinates +# Usage: click.sh X Y [left|right|middle|double|triple] + +export DISPLAY=:99 + +X=$1 +Y=$2 +BUTTON=${3:-left} + +if [ -z "$X" ] || [ -z "$Y" ]; then + echo "ERROR: Usage: click.sh X Y [left|right|middle|double|triple]" >&2 + exit 1 +fi + +# Move to position first +xdotool mousemove --sync "$X" "$Y" + +# Click based on button type +case "$BUTTON" in + left) + xdotool click 1 + ;; + right) + xdotool click 3 + ;; + middle) + xdotool click 2 + ;; + double) + xdotool click --repeat 2 --delay 100 1 + ;; + triple) + xdotool click --repeat 3 --delay 100 1 + ;; + *) + echo "ERROR: Unknown button type: $BUTTON" >&2 + exit 1 + ;; +esac + +echo "Clicked $BUTTON at $X,$Y" + +# Auto-screenshot after action (2 sec delay) +sleep 2 +exec "$(dirname "$0")/screenshot.sh" diff --git a/scripts/cursor_position.sh b/scripts/cursor_position.sh new file mode 100644 index 0000000..7065943 --- /dev/null +++ b/scripts/cursor_position.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# cursor_position.sh - Get current mouse coordinates + +export DISPLAY=:99 + +# Get mouse location +eval $(xdotool getmouselocation --shell 2>/dev/null) + +echo "X=$X,Y=$Y" diff --git a/scripts/drag.sh b/scripts/drag.sh new file mode 100644 index 0000000..9a18b92 --- /dev/null +++ b/scripts/drag.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# drag.sh - Drag from start to end coordinates +# Usage: drag.sh X1 Y1 X2 Y2 + +export DISPLAY=:99 + +X1=$1 +Y1=$2 +X2=$3 +Y2=$4 + +if [ -z "$X1" ] || [ -z "$Y1" ] || [ -z "$X2" ] || [ -z "$Y2" ]; then + echo "ERROR: Usage: drag.sh X1 Y1 X2 Y2" >&2 + exit 1 +fi + +xdotool mousemove --sync "$X1" "$Y1" mousedown 1 mousemove --sync "$X2" "$Y2" mouseup 1 + +echo "Dragged from $X1,$Y1 to $X2,$Y2" + +# Auto-screenshot after action +sleep 2 +exec "$(dirname "$0")/screenshot.sh" diff --git a/scripts/hold_key.sh b/scripts/hold_key.sh new file mode 100644 index 0000000..9fc0a52 --- /dev/null +++ b/scripts/hold_key.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# hold_key.sh - Hold a key for specified duration +# Usage: hold_key.sh "key" duration_seconds + +export DISPLAY=:99 + +KEY="$1" +DURATION="$2" + +if [ -z "$KEY" ] || [ -z "$DURATION" ]; then + echo "ERROR: Usage: hold_key.sh \"key\" duration_seconds" >&2 + exit 1 +fi + +# Validate duration is reasonable +if (( $(echo "$DURATION > 100" | bc -l) )); then + echo "ERROR: Duration too long (max 100 seconds)" >&2 + exit 1 +fi + +xdotool keydown "$KEY" +sleep "$DURATION" +xdotool keyup "$KEY" + +echo "Held $KEY for $DURATION seconds" + +# Auto-screenshot after action +sleep 2 +exec "$(dirname "$0")/screenshot.sh" diff --git a/scripts/key.sh b/scripts/key.sh new file mode 100644 index 0000000..5e7c50a --- /dev/null +++ b/scripts/key.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# key.sh - Press key or key combination +# Usage: key.sh "Return" or key.sh "ctrl+c" or key.sh "alt+F4" + +export DISPLAY=:99 + +KEY="$1" + +if [ -z "$KEY" ]; then + echo "ERROR: Usage: key.sh \"key_combo\"" >&2 + exit 1 +fi + +xdotool key -- "$KEY" +echo "Pressed key: $KEY" + +# Auto-screenshot after action +sleep 2 +exec "$(dirname "$0")/screenshot.sh" diff --git a/scripts/minimal-desktop.sh b/scripts/minimal-desktop.sh new file mode 100644 index 0000000..c60540f --- /dev/null +++ b/scripts/minimal-desktop.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Minimal XFCE desktop without xfdesktop (prevents VNC flickering) +# Runs as a watchdog: starts xfwm4+panel, kills xfdesktop if it respawns + +export DISPLAY=:99 + +# Wait for X server +while ! xdpyinfo -display :99 >/dev/null 2>&1; do + sleep 0.5 +done + +# Kill any existing session/desktop that causes flickering +pkill -f xfce4-session 2>/dev/null +pkill -f xfdesktop 2>/dev/null +sleep 1 + +# Set static background (no redraw cycles = no flicker) +xsetroot -solid "#2d3436" + +# Disable screen blanking +xset s off +xset s noblank +xset -dpms 2>/dev/null + +# Start window manager (if not running) +pgrep -x xfwm4 || xfwm4 & + +# Start panel (if not running) +pgrep -x xfce4-panel || xfce4-panel & + +# Watchdog loop: kill flickering processes, respawn essentials +while true; do + # Kill unwanted processes that cause flickering + if pgrep -x xfdesktop >/dev/null; then + pkill -f xfdesktop + xsetroot -solid "#2d3436" + fi + if pgrep -x xfce4-session >/dev/null; then + pkill -f xfce4-session + fi + + # Respawn xfwm4 if it died + if ! pgrep -x xfwm4 >/dev/null; then + xfwm4 & + fi + + # Respawn panel if it died + if ! pgrep -x xfce4-panel >/dev/null; then + xfce4-panel & + fi + + sleep 1 +done diff --git a/scripts/mouse_down.sh b/scripts/mouse_down.sh new file mode 100644 index 0000000..70a3efb --- /dev/null +++ b/scripts/mouse_down.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# mouse_down.sh - Press left mouse button (no release) + +export DISPLAY=:99 + +xdotool mousedown 1 +echo "Mouse button pressed" diff --git a/scripts/mouse_move.sh b/scripts/mouse_move.sh new file mode 100644 index 0000000..ad9e4e5 --- /dev/null +++ b/scripts/mouse_move.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# mouse_move.sh - Move mouse to coordinates +# Usage: mouse_move.sh X Y + +export DISPLAY=:99 + +X=$1 +Y=$2 + +if [ -z "$X" ] || [ -z "$Y" ]; then + echo "ERROR: Usage: mouse_move.sh X Y" >&2 + exit 1 +fi + +xdotool mousemove --sync "$X" "$Y" +echo "Moved mouse to $X,$Y" diff --git a/scripts/mouse_up.sh b/scripts/mouse_up.sh new file mode 100644 index 0000000..2693837 --- /dev/null +++ b/scripts/mouse_up.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# mouse_up.sh - Release left mouse button + +export DISPLAY=:99 + +xdotool mouseup 1 +echo "Mouse button released" + +# Auto-screenshot after action +sleep 2 +exec "$(dirname "$0")/screenshot.sh" diff --git a/scripts/screenshot.sh b/scripts/screenshot.sh new file mode 100644 index 0000000..6861975 --- /dev/null +++ b/scripts/screenshot.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# screenshot.sh - Capture screen and return base64 PNG +# Resolution: 1024x768 (XGA) + +export DISPLAY=:99 +OUTPUT_DIR="/tmp/computer-use" +mkdir -p "$OUTPUT_DIR" + +TIMESTAMP=$(date +%s%N) +FILE="$OUTPUT_DIR/screenshot_$TIMESTAMP.png" + +# Take screenshot +scrot -o "$FILE" 2>/dev/null + +if [ ! -f "$FILE" ]; then + echo "ERROR: Failed to take screenshot" >&2 + exit 1 +fi + +# Output base64 +base64 -w0 "$FILE" + +# Cleanup +rm -f "$FILE" diff --git a/scripts/scroll.sh b/scripts/scroll.sh new file mode 100644 index 0000000..a61568d --- /dev/null +++ b/scripts/scroll.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# scroll.sh - Scroll in a direction +# Usage: scroll.sh direction amount [x y] +# direction: up, down, left, right +# amount: number of scroll units + +export DISPLAY=:99 + +DIRECTION="$1" +AMOUNT="${2:-3}" +X="$3" +Y="$4" + +if [ -z "$DIRECTION" ]; then + echo "ERROR: Usage: scroll.sh direction [amount] [x y]" >&2 + exit 1 +fi + +# Move to position if specified +if [ -n "$X" ] && [ -n "$Y" ]; then + xdotool mousemove --sync "$X" "$Y" +fi + +# Map direction to button +case "$DIRECTION" in + up) + BUTTON=4 + ;; + down) + BUTTON=5 + ;; + left) + BUTTON=6 + ;; + right) + BUTTON=7 + ;; + *) + echo "ERROR: Unknown direction: $DIRECTION (use up/down/left/right)" >&2 + exit 1 + ;; +esac + +xdotool click --repeat "$AMOUNT" "$BUTTON" +echo "Scrolled $DIRECTION $AMOUNT times" + +# Auto-screenshot after action +sleep 2 +exec "$(dirname "$0")/screenshot.sh" diff --git a/scripts/setup-vnc.sh b/scripts/setup-vnc.sh new file mode 100644 index 0000000..267a577 --- /dev/null +++ b/scripts/setup-vnc.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# Setup stable VNC for computer-use skill +# Run once to install systemd services for flicker-free VNC desktop + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SKILL_DIR="$(dirname "$SCRIPT_DIR")" +USER=$(whoami) +HOME_DIR=$(eval echo "~$USER") + +echo "=== Computer Use VNC Setup ===" +echo "User: $USER" +echo "Skill dir: $SKILL_DIR" +echo "" + +# Install packages +echo "[1/6] Installing packages..." +sudo apt update -qq +sudo apt install -y xvfb xfce4 xfce4-terminal xdotool scrot imagemagick dbus-x11 x11vnc novnc websockify + +# Copy minimal-desktop.sh to a stable location +echo "[2/6] Installing watchdog script..." +sudo mkdir -p /opt/computer-use +sudo cp "$SCRIPT_DIR/minimal-desktop.sh" /opt/computer-use/ +sudo chmod +x /opt/computer-use/minimal-desktop.sh + +# Install systemd services (generated inline) +echo "[3/6] Installing systemd services..." + +cat < /dev/null +[Unit] +Description=Xvfb Virtual Display :99 +After=graphical.target +Wants=graphical.target + +[Service] +Type=simple +ExecStart=/usr/bin/Xvfb :99 -screen 0 1024x768x24 -nolisten tcp -dpi 96 +Restart=always +RestartSec=1 +User=$USER +Environment="HOME=$HOME_DIR" + +[Install] +WantedBy=multi-user.target +EOF + +cat < /dev/null +[Unit] +Description=XFCE Minimal Desktop for Computer Use +After=xvfb.service +Requires=xvfb.service + +[Service] +Type=simple +ExecStart=/opt/computer-use/minimal-desktop.sh +Restart=always +RestartSec=3 +User=$USER +Environment="HOME=$HOME_DIR" +Environment="DISPLAY=:99" + +[Install] +WantedBy=multi-user.target +EOF + +cat < /dev/null +[Unit] +Description=x11vnc VNC Server +After=xfce-minimal.service +Requires=xfce-minimal.service + +[Service] +Type=simple +ExecStart=/usr/bin/x11vnc -display :99 -forever -shared -rfbport 5900 -noxdamage -noxfixes -noclipboard +Restart=always +RestartSec=2 +User=$USER +Environment="HOME=$HOME_DIR" +Environment="DISPLAY=:99" + +[Install] +WantedBy=multi-user.target +EOF + +cat < /dev/null +[Unit] +Description=noVNC WebSocket Proxy +After=x11vnc.service +Requires=x11vnc.service + +[Service] +Type=simple +ExecStart=/usr/share/novnc/utils/novnc_proxy --vnc localhost:5900 --listen 6080 --heartbeat 30 +Restart=always +RestartSec=2 +User=$USER +Environment="HOME=$HOME_DIR" + +[Install] +WantedBy=multi-user.target +EOF + +# Mask xfdesktop to prevent flickering +echo "[4/6] Masking xfdesktop (prevents flicker)..." +if [ -f /usr/bin/xfdesktop ] && [ ! -f /usr/bin/xfdesktop.real ]; then + sudo mv /usr/bin/xfdesktop /usr/bin/xfdesktop.real + echo '#!/bin/bash +# Masked - xfdesktop causes VNC flickering on Xvfb +exit 0' | sudo tee /usr/bin/xfdesktop > /dev/null + sudo chmod +x /usr/bin/xfdesktop + echo " xfdesktop masked (original at /usr/bin/xfdesktop.real)" +else + echo " xfdesktop already masked or not found" +fi + +# Enable and start services +echo "[5/6] Enabling services..." +sudo systemctl daemon-reload +sudo systemctl enable xvfb xfce-minimal x11vnc novnc + +echo "[6/6] Starting services..." +sudo systemctl start xvfb +sleep 2 +sudo systemctl start xfce-minimal +sleep 3 +sudo systemctl start x11vnc +sleep 1 +sudo systemctl start novnc + +echo "" +echo "=== Setup Complete ===" +echo "" +echo "Services running:" +systemctl is-active xvfb xfce-minimal x11vnc novnc | paste - - - - | awk '{print " xvfb: "$1" xfce-minimal: "$2" x11vnc: "$3" novnc: "$4}' +echo "" +echo "Access VNC:" +echo " 1. SSH tunnel: ssh -L 6080:localhost:6080 $(hostname)" +echo " 2. Open: http://localhost:6080/vnc.html" +echo "" +echo "Or add to ~/.ssh/config:" +echo " Host $(hostname)" +echo " LocalForward 6080 127.0.0.1:6080" diff --git a/scripts/type_text.sh b/scripts/type_text.sh new file mode 100644 index 0000000..5432aad --- /dev/null +++ b/scripts/type_text.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# type_text.sh - Type text with realistic delays +# Usage: type_text.sh "text to type" +# Types in 50 character chunks with 12ms delay between keystrokes + +export DISPLAY=:99 + +TEXT="$1" + +if [ -z "$TEXT" ]; then + echo "ERROR: Usage: type_text.sh \"text to type\"" >&2 + exit 1 +fi + +# Type in chunks of 50 characters +CHUNK_SIZE=50 +LENGTH=${#TEXT} +OFFSET=0 + +while [ $OFFSET -lt $LENGTH ]; do + CHUNK="${TEXT:$OFFSET:$CHUNK_SIZE}" + xdotool type --delay 12 -- "$CHUNK" + OFFSET=$((OFFSET + CHUNK_SIZE)) +done + +echo "Typed ${#TEXT} characters" + +# Auto-screenshot after action +sleep 2 +exec "$(dirname "$0")/screenshot.sh" diff --git a/scripts/vnc_start.sh b/scripts/vnc_start.sh new file mode 100644 index 0000000..19e1c64 --- /dev/null +++ b/scripts/vnc_start.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Start VNC services for live desktop viewing + +DISPLAY_NUM="${DISPLAY_NUM:-:99}" + +# Kill existing instances +pkill -f "x11vnc.*display $DISPLAY_NUM" 2>/dev/null +pkill -f "websockify.*6080" 2>/dev/null +sleep 1 + +# Start x11vnc (VNC server) +echo "Starting x11vnc on $DISPLAY_NUM..." +x11vnc -display "$DISPLAY_NUM" -forever -shared -nopw -listen localhost & +sleep 2 + +# Start websockify (noVNC web bridge) +echo "Starting noVNC on port 6080..." +websockify --web=/usr/share/novnc 6080 localhost:5900 & +sleep 1 + +# Verify +if pgrep -f x11vnc > /dev/null && pgrep -f websockify > /dev/null; then + echo "" + echo "✓ VNC services started" + echo "" + echo "To connect:" + echo " 1. SSH tunnel: ssh -L 6080:localhost:6080 your-server" + echo " 2. Open: http://localhost:6080/vnc.html?autoconnect=true" + echo "" +else + echo "✗ Failed to start VNC services" + exit 1 +fi diff --git a/scripts/vnc_stop.sh b/scripts/vnc_stop.sh new file mode 100644 index 0000000..7e9c536 --- /dev/null +++ b/scripts/vnc_stop.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Stop VNC services + +echo "Stopping VNC services..." + +pkill -f x11vnc 2>/dev/null +pkill -f websockify 2>/dev/null + +sleep 1 + +if ! pgrep -f x11vnc > /dev/null && ! pgrep -f websockify > /dev/null; then + echo "✓ VNC services stopped" +else + echo "✗ Some processes may still be running" + ps aux | grep -E "(x11vnc|websockify)" | grep -v grep +fi diff --git a/scripts/wait.sh b/scripts/wait.sh new file mode 100644 index 0000000..6ddbf52 --- /dev/null +++ b/scripts/wait.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# wait.sh - Wait for specified duration then screenshot +# Usage: wait.sh seconds + +export DISPLAY=:99 + +DURATION="$1" + +if [ -z "$DURATION" ]; then + echo "ERROR: Usage: wait.sh seconds" >&2 + exit 1 +fi + +# Validate duration is reasonable +if (( $(echo "$DURATION > 100" | bc -l) )); then + echo "ERROR: Duration too long (max 100 seconds)" >&2 + exit 1 +fi + +sleep "$DURATION" +echo "Waited $DURATION seconds" + +# Screenshot after waiting +exec "$(dirname "$0")/screenshot.sh" diff --git a/scripts/zoom.sh b/scripts/zoom.sh new file mode 100644 index 0000000..609d024 --- /dev/null +++ b/scripts/zoom.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# zoom.sh - Capture cropped region of screen +# Usage: zoom.sh X1 Y1 X2 Y2 +# Returns base64 of the cropped region + +export DISPLAY=:99 +OUTPUT_DIR="/tmp/computer-use" +mkdir -p "$OUTPUT_DIR" + +X1=$1 +Y1=$2 +X2=$3 +Y2=$4 + +if [ -z "$X1" ] || [ -z "$Y1" ] || [ -z "$X2" ] || [ -z "$Y2" ]; then + echo "ERROR: Usage: zoom.sh X1 Y1 X2 Y2" >&2 + exit 1 +fi + +TIMESTAMP=$(date +%s%N) +FULL_FILE="$OUTPUT_DIR/full_$TIMESTAMP.png" +CROP_FILE="$OUTPUT_DIR/crop_$TIMESTAMP.png" + +# Take full screenshot +scrot -o "$FULL_FILE" 2>/dev/null + +if [ ! -f "$FULL_FILE" ]; then + echo "ERROR: Failed to take screenshot" >&2 + exit 1 +fi + +# Calculate crop dimensions +WIDTH=$((X2 - X1)) +HEIGHT=$((Y2 - Y1)) + +# Crop using ImageMagick +convert "$FULL_FILE" -crop "${WIDTH}x${HEIGHT}+${X1}+${Y1}" +repage "$CROP_FILE" + +if [ ! -f "$CROP_FILE" ]; then + echo "ERROR: Failed to crop screenshot" >&2 + rm -f "$FULL_FILE" + exit 1 +fi + +# Output base64 +base64 -w0 "$CROP_FILE" + +# Cleanup +rm -f "$FULL_FILE" "$CROP_FILE"