Initial commit with translated description

This commit is contained in:
2026-03-29 13:18:25 +08:00
commit 4ca4dbf396
19 changed files with 803 additions and 0 deletions

46
scripts/click.sh Normal file
View File

@@ -0,0 +1,46 @@
#!/bin/bash
# click.sh - Click at coordinates
# Usage: click.sh X Y [left|right|middle|double|triple]
export DISPLAY=:99
X=$1
Y=$2
BUTTON=${3:-left}
if [ -z "$X" ] || [ -z "$Y" ]; then
echo "ERROR: Usage: click.sh X Y [left|right|middle|double|triple]" >&2
exit 1
fi
# Move to position first
xdotool mousemove --sync "$X" "$Y"
# Click based on button type
case "$BUTTON" in
left)
xdotool click 1
;;
right)
xdotool click 3
;;
middle)
xdotool click 2
;;
double)
xdotool click --repeat 2 --delay 100 1
;;
triple)
xdotool click --repeat 3 --delay 100 1
;;
*)
echo "ERROR: Unknown button type: $BUTTON" >&2
exit 1
;;
esac
echo "Clicked $BUTTON at $X,$Y"
# Auto-screenshot after action (2 sec delay)
sleep 2
exec "$(dirname "$0")/screenshot.sh"

View File

@@ -0,0 +1,9 @@
#!/bin/bash
# cursor_position.sh - Get current mouse coordinates
export DISPLAY=:99
# Get mouse location
eval $(xdotool getmouselocation --shell 2>/dev/null)
echo "X=$X,Y=$Y"

23
scripts/drag.sh Normal file
View File

@@ -0,0 +1,23 @@
#!/bin/bash
# drag.sh - Drag from start to end coordinates
# Usage: drag.sh X1 Y1 X2 Y2
export DISPLAY=:99
X1=$1
Y1=$2
X2=$3
Y2=$4
if [ -z "$X1" ] || [ -z "$Y1" ] || [ -z "$X2" ] || [ -z "$Y2" ]; then
echo "ERROR: Usage: drag.sh X1 Y1 X2 Y2" >&2
exit 1
fi
xdotool mousemove --sync "$X1" "$Y1" mousedown 1 mousemove --sync "$X2" "$Y2" mouseup 1
echo "Dragged from $X1,$Y1 to $X2,$Y2"
# Auto-screenshot after action
sleep 2
exec "$(dirname "$0")/screenshot.sh"

29
scripts/hold_key.sh Normal file
View File

@@ -0,0 +1,29 @@
#!/bin/bash
# hold_key.sh - Hold a key for specified duration
# Usage: hold_key.sh "key" duration_seconds
export DISPLAY=:99
KEY="$1"
DURATION="$2"
if [ -z "$KEY" ] || [ -z "$DURATION" ]; then
echo "ERROR: Usage: hold_key.sh \"key\" duration_seconds" >&2
exit 1
fi
# Validate duration is reasonable
if (( $(echo "$DURATION > 100" | bc -l) )); then
echo "ERROR: Duration too long (max 100 seconds)" >&2
exit 1
fi
xdotool keydown "$KEY"
sleep "$DURATION"
xdotool keyup "$KEY"
echo "Held $KEY for $DURATION seconds"
# Auto-screenshot after action
sleep 2
exec "$(dirname "$0")/screenshot.sh"

19
scripts/key.sh Normal file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
# key.sh - Press key or key combination
# Usage: key.sh "Return" or key.sh "ctrl+c" or key.sh "alt+F4"
export DISPLAY=:99
KEY="$1"
if [ -z "$KEY" ]; then
echo "ERROR: Usage: key.sh \"key_combo\"" >&2
exit 1
fi
xdotool key -- "$KEY"
echo "Pressed key: $KEY"
# Auto-screenshot after action
sleep 2
exec "$(dirname "$0")/screenshot.sh"

View File

@@ -0,0 +1,53 @@
#!/bin/bash
# Minimal XFCE desktop without xfdesktop (prevents VNC flickering)
# Runs as a watchdog: starts xfwm4+panel, kills xfdesktop if it respawns
export DISPLAY=:99
# Wait for X server
while ! xdpyinfo -display :99 >/dev/null 2>&1; do
sleep 0.5
done
# Kill any existing session/desktop that causes flickering
pkill -f xfce4-session 2>/dev/null
pkill -f xfdesktop 2>/dev/null
sleep 1
# Set static background (no redraw cycles = no flicker)
xsetroot -solid "#2d3436"
# Disable screen blanking
xset s off
xset s noblank
xset -dpms 2>/dev/null
# Start window manager (if not running)
pgrep -x xfwm4 || xfwm4 &
# Start panel (if not running)
pgrep -x xfce4-panel || xfce4-panel &
# Watchdog loop: kill flickering processes, respawn essentials
while true; do
# Kill unwanted processes that cause flickering
if pgrep -x xfdesktop >/dev/null; then
pkill -f xfdesktop
xsetroot -solid "#2d3436"
fi
if pgrep -x xfce4-session >/dev/null; then
pkill -f xfce4-session
fi
# Respawn xfwm4 if it died
if ! pgrep -x xfwm4 >/dev/null; then
xfwm4 &
fi
# Respawn panel if it died
if ! pgrep -x xfce4-panel >/dev/null; then
xfce4-panel &
fi
sleep 1
done

7
scripts/mouse_down.sh Normal file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
# mouse_down.sh - Press left mouse button (no release)
export DISPLAY=:99
xdotool mousedown 1
echo "Mouse button pressed"

16
scripts/mouse_move.sh Normal file
View File

@@ -0,0 +1,16 @@
#!/bin/bash
# mouse_move.sh - Move mouse to coordinates
# Usage: mouse_move.sh X Y
export DISPLAY=:99
X=$1
Y=$2
if [ -z "$X" ] || [ -z "$Y" ]; then
echo "ERROR: Usage: mouse_move.sh X Y" >&2
exit 1
fi
xdotool mousemove --sync "$X" "$Y"
echo "Moved mouse to $X,$Y"

11
scripts/mouse_up.sh Normal file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
# mouse_up.sh - Release left mouse button
export DISPLAY=:99
xdotool mouseup 1
echo "Mouse button released"
# Auto-screenshot after action
sleep 2
exec "$(dirname "$0")/screenshot.sh"

24
scripts/screenshot.sh Normal file
View File

@@ -0,0 +1,24 @@
#!/bin/bash
# screenshot.sh - Capture screen and return base64 PNG
# Resolution: 1024x768 (XGA)
export DISPLAY=:99
OUTPUT_DIR="/tmp/computer-use"
mkdir -p "$OUTPUT_DIR"
TIMESTAMP=$(date +%s%N)
FILE="$OUTPUT_DIR/screenshot_$TIMESTAMP.png"
# Take screenshot
scrot -o "$FILE" 2>/dev/null
if [ ! -f "$FILE" ]; then
echo "ERROR: Failed to take screenshot" >&2
exit 1
fi
# Output base64
base64 -w0 "$FILE"
# Cleanup
rm -f "$FILE"

49
scripts/scroll.sh Normal file
View File

@@ -0,0 +1,49 @@
#!/bin/bash
# scroll.sh - Scroll in a direction
# Usage: scroll.sh direction amount [x y]
# direction: up, down, left, right
# amount: number of scroll units
export DISPLAY=:99
DIRECTION="$1"
AMOUNT="${2:-3}"
X="$3"
Y="$4"
if [ -z "$DIRECTION" ]; then
echo "ERROR: Usage: scroll.sh direction [amount] [x y]" >&2
exit 1
fi
# Move to position if specified
if [ -n "$X" ] && [ -n "$Y" ]; then
xdotool mousemove --sync "$X" "$Y"
fi
# Map direction to button
case "$DIRECTION" in
up)
BUTTON=4
;;
down)
BUTTON=5
;;
left)
BUTTON=6
;;
right)
BUTTON=7
;;
*)
echo "ERROR: Unknown direction: $DIRECTION (use up/down/left/right)" >&2
exit 1
;;
esac
xdotool click --repeat "$AMOUNT" "$BUTTON"
echo "Scrolled $DIRECTION $AMOUNT times"
# Auto-screenshot after action
sleep 2
exec "$(dirname "$0")/screenshot.sh"

144
scripts/setup-vnc.sh Normal file
View File

@@ -0,0 +1,144 @@
#!/bin/bash
# Setup stable VNC for computer-use skill
# Run once to install systemd services for flicker-free VNC desktop
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SKILL_DIR="$(dirname "$SCRIPT_DIR")"
USER=$(whoami)
HOME_DIR=$(eval echo "~$USER")
echo "=== Computer Use VNC Setup ==="
echo "User: $USER"
echo "Skill dir: $SKILL_DIR"
echo ""
# Install packages
echo "[1/6] Installing packages..."
sudo apt update -qq
sudo apt install -y xvfb xfce4 xfce4-terminal xdotool scrot imagemagick dbus-x11 x11vnc novnc websockify
# Copy minimal-desktop.sh to a stable location
echo "[2/6] Installing watchdog script..."
sudo mkdir -p /opt/computer-use
sudo cp "$SCRIPT_DIR/minimal-desktop.sh" /opt/computer-use/
sudo chmod +x /opt/computer-use/minimal-desktop.sh
# Install systemd services (generated inline)
echo "[3/6] Installing systemd services..."
cat <<EOF | sudo tee /etc/systemd/system/xvfb.service > /dev/null
[Unit]
Description=Xvfb Virtual Display :99
After=graphical.target
Wants=graphical.target
[Service]
Type=simple
ExecStart=/usr/bin/Xvfb :99 -screen 0 1024x768x24 -nolisten tcp -dpi 96
Restart=always
RestartSec=1
User=$USER
Environment="HOME=$HOME_DIR"
[Install]
WantedBy=multi-user.target
EOF
cat <<EOF | sudo tee /etc/systemd/system/xfce-minimal.service > /dev/null
[Unit]
Description=XFCE Minimal Desktop for Computer Use
After=xvfb.service
Requires=xvfb.service
[Service]
Type=simple
ExecStart=/opt/computer-use/minimal-desktop.sh
Restart=always
RestartSec=3
User=$USER
Environment="HOME=$HOME_DIR"
Environment="DISPLAY=:99"
[Install]
WantedBy=multi-user.target
EOF
cat <<EOF | sudo tee /etc/systemd/system/x11vnc.service > /dev/null
[Unit]
Description=x11vnc VNC Server
After=xfce-minimal.service
Requires=xfce-minimal.service
[Service]
Type=simple
ExecStart=/usr/bin/x11vnc -display :99 -forever -shared -rfbport 5900 -noxdamage -noxfixes -noclipboard
Restart=always
RestartSec=2
User=$USER
Environment="HOME=$HOME_DIR"
Environment="DISPLAY=:99"
[Install]
WantedBy=multi-user.target
EOF
cat <<EOF | sudo tee /etc/systemd/system/novnc.service > /dev/null
[Unit]
Description=noVNC WebSocket Proxy
After=x11vnc.service
Requires=x11vnc.service
[Service]
Type=simple
ExecStart=/usr/share/novnc/utils/novnc_proxy --vnc localhost:5900 --listen 6080 --heartbeat 30
Restart=always
RestartSec=2
User=$USER
Environment="HOME=$HOME_DIR"
[Install]
WantedBy=multi-user.target
EOF
# Mask xfdesktop to prevent flickering
echo "[4/6] Masking xfdesktop (prevents flicker)..."
if [ -f /usr/bin/xfdesktop ] && [ ! -f /usr/bin/xfdesktop.real ]; then
sudo mv /usr/bin/xfdesktop /usr/bin/xfdesktop.real
echo '#!/bin/bash
# Masked - xfdesktop causes VNC flickering on Xvfb
exit 0' | sudo tee /usr/bin/xfdesktop > /dev/null
sudo chmod +x /usr/bin/xfdesktop
echo " xfdesktop masked (original at /usr/bin/xfdesktop.real)"
else
echo " xfdesktop already masked or not found"
fi
# Enable and start services
echo "[5/6] Enabling services..."
sudo systemctl daemon-reload
sudo systemctl enable xvfb xfce-minimal x11vnc novnc
echo "[6/6] Starting services..."
sudo systemctl start xvfb
sleep 2
sudo systemctl start xfce-minimal
sleep 3
sudo systemctl start x11vnc
sleep 1
sudo systemctl start novnc
echo ""
echo "=== Setup Complete ==="
echo ""
echo "Services running:"
systemctl is-active xvfb xfce-minimal x11vnc novnc | paste - - - - | awk '{print " xvfb: "$1" xfce-minimal: "$2" x11vnc: "$3" novnc: "$4}'
echo ""
echo "Access VNC:"
echo " 1. SSH tunnel: ssh -L 6080:localhost:6080 $(hostname)"
echo " 2. Open: http://localhost:6080/vnc.html"
echo ""
echo "Or add to ~/.ssh/config:"
echo " Host $(hostname)"
echo " LocalForward 6080 127.0.0.1:6080"

30
scripts/type_text.sh Normal file
View File

@@ -0,0 +1,30 @@
#!/bin/bash
# type_text.sh - Type text with realistic delays
# Usage: type_text.sh "text to type"
# Types in 50 character chunks with 12ms delay between keystrokes
export DISPLAY=:99
TEXT="$1"
if [ -z "$TEXT" ]; then
echo "ERROR: Usage: type_text.sh \"text to type\"" >&2
exit 1
fi
# Type in chunks of 50 characters
CHUNK_SIZE=50
LENGTH=${#TEXT}
OFFSET=0
while [ $OFFSET -lt $LENGTH ]; do
CHUNK="${TEXT:$OFFSET:$CHUNK_SIZE}"
xdotool type --delay 12 -- "$CHUNK"
OFFSET=$((OFFSET + CHUNK_SIZE))
done
echo "Typed ${#TEXT} characters"
# Auto-screenshot after action
sleep 2
exec "$(dirname "$0")/screenshot.sh"

33
scripts/vnc_start.sh Normal file
View File

@@ -0,0 +1,33 @@
#!/bin/bash
# Start VNC services for live desktop viewing
DISPLAY_NUM="${DISPLAY_NUM:-:99}"
# Kill existing instances
pkill -f "x11vnc.*display $DISPLAY_NUM" 2>/dev/null
pkill -f "websockify.*6080" 2>/dev/null
sleep 1
# Start x11vnc (VNC server)
echo "Starting x11vnc on $DISPLAY_NUM..."
x11vnc -display "$DISPLAY_NUM" -forever -shared -nopw -listen localhost &
sleep 2
# Start websockify (noVNC web bridge)
echo "Starting noVNC on port 6080..."
websockify --web=/usr/share/novnc 6080 localhost:5900 &
sleep 1
# Verify
if pgrep -f x11vnc > /dev/null && pgrep -f websockify > /dev/null; then
echo ""
echo "✓ VNC services started"
echo ""
echo "To connect:"
echo " 1. SSH tunnel: ssh -L 6080:localhost:6080 your-server"
echo " 2. Open: http://localhost:6080/vnc.html?autoconnect=true"
echo ""
else
echo "✗ Failed to start VNC services"
exit 1
fi

16
scripts/vnc_stop.sh Normal file
View File

@@ -0,0 +1,16 @@
#!/bin/bash
# Stop VNC services
echo "Stopping VNC services..."
pkill -f x11vnc 2>/dev/null
pkill -f websockify 2>/dev/null
sleep 1
if ! pgrep -f x11vnc > /dev/null && ! pgrep -f websockify > /dev/null; then
echo "✓ VNC services stopped"
else
echo "✗ Some processes may still be running"
ps aux | grep -E "(x11vnc|websockify)" | grep -v grep
fi

24
scripts/wait.sh Normal file
View File

@@ -0,0 +1,24 @@
#!/bin/bash
# wait.sh - Wait for specified duration then screenshot
# Usage: wait.sh seconds
export DISPLAY=:99
DURATION="$1"
if [ -z "$DURATION" ]; then
echo "ERROR: Usage: wait.sh seconds" >&2
exit 1
fi
# Validate duration is reasonable
if (( $(echo "$DURATION > 100" | bc -l) )); then
echo "ERROR: Duration too long (max 100 seconds)" >&2
exit 1
fi
sleep "$DURATION"
echo "Waited $DURATION seconds"
# Screenshot after waiting
exec "$(dirname "$0")/screenshot.sh"

49
scripts/zoom.sh Normal file
View File

@@ -0,0 +1,49 @@
#!/bin/bash
# zoom.sh - Capture cropped region of screen
# Usage: zoom.sh X1 Y1 X2 Y2
# Returns base64 of the cropped region
export DISPLAY=:99
OUTPUT_DIR="/tmp/computer-use"
mkdir -p "$OUTPUT_DIR"
X1=$1
Y1=$2
X2=$3
Y2=$4
if [ -z "$X1" ] || [ -z "$Y1" ] || [ -z "$X2" ] || [ -z "$Y2" ]; then
echo "ERROR: Usage: zoom.sh X1 Y1 X2 Y2" >&2
exit 1
fi
TIMESTAMP=$(date +%s%N)
FULL_FILE="$OUTPUT_DIR/full_$TIMESTAMP.png"
CROP_FILE="$OUTPUT_DIR/crop_$TIMESTAMP.png"
# Take full screenshot
scrot -o "$FULL_FILE" 2>/dev/null
if [ ! -f "$FULL_FILE" ]; then
echo "ERROR: Failed to take screenshot" >&2
exit 1
fi
# Calculate crop dimensions
WIDTH=$((X2 - X1))
HEIGHT=$((Y2 - Y1))
# Crop using ImageMagick
convert "$FULL_FILE" -crop "${WIDTH}x${HEIGHT}+${X1}+${Y1}" +repage "$CROP_FILE"
if [ ! -f "$CROP_FILE" ]; then
echo "ERROR: Failed to crop screenshot" >&2
rm -f "$FULL_FILE"
exit 1
fi
# Output base64
base64 -w0 "$CROP_FILE"
# Cleanup
rm -f "$FULL_FILE" "$CROP_FILE"