From 6637dd34f9a2bcd2c79a9b9003cd7e76086d5fa7 Mon Sep 17 00:00:00 2001 From: zlei9 Date: Sun, 29 Mar 2026 08:32:46 +0800 Subject: [PATCH] Initial commit with translated description --- .clawhubsafe | 13 + CHANGELOG.md | 83 +++++ README.md | 192 ++++++++++ SECURITY.md | 314 ++++++++++++++++ SKILL.md | 651 +++++++++++++++++++++++++++++++++ _meta.json | 6 + scripts/context_optimizer.py | 395 ++++++++++++++++++++ scripts/heartbeat_optimizer.py | 298 +++++++++++++++ scripts/model_router.py | 438 ++++++++++++++++++++++ scripts/optimize.sh | 67 ++++ scripts/token_tracker.py | 156 ++++++++ 11 files changed, 2613 insertions(+) create mode 100644 .clawhubsafe create mode 100644 CHANGELOG.md create mode 100644 README.md create mode 100644 SECURITY.md create mode 100644 SKILL.md create mode 100644 _meta.json create mode 100644 scripts/context_optimizer.py create mode 100644 scripts/heartbeat_optimizer.py create mode 100644 scripts/model_router.py create mode 100644 scripts/optimize.sh create mode 100644 scripts/token_tracker.py diff --git a/.clawhubsafe b/.clawhubsafe new file mode 100644 index 0000000..9737c65 --- /dev/null +++ b/.clawhubsafe @@ -0,0 +1,13 @@ +fdf74d1243de04e300df83218804efecacf0684f36be823d35677ed0c30b45bc assets/config-patches.json +3e266e0d786f4f50c128ac7eae0292b71b593c242e9455e3b2379e3471793cb4 assets/cronjob-model-guide.md +06b0efa90d19c8ae9cd29a86ff6b02bb8bf3de5016bbeede066643a97e6f82b5 assets/HEARTBEAT.template.md +018bbc0135ba138ecbe55587727297df13766979558e9e6ca5f1e7b59ac81a75 CHANGELOG.md +17b8e56e6bf9b418135adb7362ab2dfccf637950ddd2a1c9d1a019edf03ac28e README.md +9b6100865c8d65e9c7bcebe71a88c3d794c25b9a9a8881e859738df1a4a74b8a references/PROVIDERS.md +43aed2cad4a5ab78bcb2a1b4f2c5685221f93e56a2964d0064b778643063f89f scripts/context_optimizer.py +6d03311e9d6848593f6a4fc07a271d5518ee56b37bc3e8818ac4dace44f9fb68 scripts/heartbeat_optimizer.py +353aed33fa4e8bb86db1d7e818f9df270f541264b413131438e5188d480ade79 scripts/model_router.py +8616b11aff67104fc06c2801768d8b5627dae97ceafad2d3ce7c1d5c7bb51b8d scripts/optimize.sh +55aad57bdc88fee74e4810f5ab2c1545faed783a64a2fc1abb8caca377234fca scripts/token_tracker.py +0486f41b2ef26d63078fdfd0baea710078419ccf17f10964973d9117b55a056b SECURITY.md +5a11898fb9692026d3db63a43e9f212fa643e6e7871f78b0cb3e6c95590c62ac SKILL.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..2494a36 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,83 @@ +# Changelog + +All notable changes to OpenClaw Token Optimizer are documented here. + +## [3.0.0] - 2026-02-28 + +### Added +- **Lazy Skill Loading section** (Capability 0) — The highest-impact optimization: use a lightweight SKILLS.md catalog at startup and load individual skill files only when tasks require them. Reduces context loading costs 40–93% depending on library size. +- References companion skill `openclaw-skill-lazy-loader` (`clawhub install openclaw-skill-lazy-loader`) which provides SKILLS.md.template, AGENTS.md.template lazy-loading section, and context_optimizer.py CLI. +- Token savings table for lazy loading (5/10/20 skills: 80%/88%/93% reduction). + +### Changed +- Version bumped to 3.0.0 reflecting the major expansion of optimization coverage to include pre-runtime context loading phase. +- Lazy loading is now positioned as "Capability 0" — the first optimization to apply before all others. + +## [1.4.3] - 2026-02-18 + +### Fixed +- README completely rewritten: correct name ("OpenClaw Token Optimizer"), version badge (1.4.3), install command (clawhub install Asif2BD/openclaw-token-optimizer), license (Apache 2.0), all skill paths corrected to openclaw-token-optimizer, v1.4.x features documented. + +## [1.4.2] - 2026-02-18 + +### Fixed +- **ClawHub security scanner: `Source: unknown` / `homepage: none`** — Added `homepage`, `source`, and `author` fields to SKILL.md frontmatter. Provenance is now fully declared in the registry. +- **`optimize.sh` missing from integrity manifest** — Added `scripts/optimize.sh` to `.clawhubsafe`. The manifest now covers 13 files (previously 12). Every bundled file is SHA256-verified. +- **`optimize.sh` undocumented** — New dedicated section in `SECURITY.md` with full source disclosure: what the wrapper does (delegates to bundled Python scripts only), security properties, and a one-line audit command for users who want to verify before running. +- **Provenance undocumented** — New `Provenance & Source` section in `SECURITY.md` with explicit GitHub repo + ClawHub listing links and integrity verification instructions. + +## [1.4.1] - 2026-02-18 + +### Fixed +- **ClawHub public visibility blocked** — `PR-DESCRIPTION.md` (a leftover dev file containing `git push` / `gh pr create` shell commands) was included in the v1.4.0 bundle, triggering ClawHub's security scanner hold. +- **Added `.clawhubignore`** — Now excludes `PR-DESCRIPTION.md`, `docs/`, `.git/`, `.gitignore` from all future publishes. Only skill-relevant files are bundled. + +## [1.4.0] - 2026-02-17 + +### Added +- **Session pruning config patch** — Native `contextPruning: { mode: "cache-ttl" }` support. Auto-trims old tool results when the Anthropic prompt cache TTL expires, reducing cache write costs after idle sessions. No scripts required — applies directly via `gateway config.patch`. +- **Bootstrap size limits patch** — `bootstrapMaxChars` / `bootstrapTotalMaxChars` config keys to cap how large workspace files are injected into the system prompt. Delivers 20-40% system prompt reduction for agents with large workspace files (e.g., detailed AGENTS.md, MEMORY.md). Native 2026.2.15 feature. +- **Cache retention config patch** — `cacheRetention: "long"` param for Opus models. Amortizes cache write costs across long reasoning sessions. +- **Cache TTL heartbeat alignment** — New `heartbeat_optimizer.py cache-ttl` command. Calculates the optimal heartbeat interval to keep the Anthropic 1h prompt cache warm (55min). Prevents the "cold restart" cache re-write penalty after idle gaps. +- **Native commands section in SKILL.md** — Documents `/context list`, `/context detail` (per-file token breakdown) and `/usage tokens|full|cost` (per-response usage footer). These are built-in OpenClaw 2026.2.15 diagnostics that complement the Python scripts. +- **`native_openclaw` category in config-patches.json** — Clearly distinguishes patches that work today with zero external dependencies from those requiring API keys or beta features. + +### Changed +- `SKILL.md`: Configuration Patches section updated — `session_pruning`, `bootstrap_size_limits`, and `cache_retention_long` now listed as ✅ native (not ⏳ pending). +- `config-patches.json`: Added `_categories.native_openclaw` grouping. Model routing patch updated to reflect Sonnet-primary setups (Haiku listed as optional with multi-provider note). +- `heartbeat_optimizer.py`: Added `cache-ttl` subcommand and `CACHE_TTL_OPTIMAL_INTERVAL` constant (3300s = 55min). Plan output now includes cache TTL alignment recommendation when relevant. +- Compliance with OpenClaw 2026.2.15 features: **72% → 92%+** + +### Fixed +- Model routing quick start: added note that Haiku requires multi-provider setup (OpenRouter/Together); Sonnet is the default minimum for single-provider Anthropic deployments. + +## [1.3.3] - 2026-02-17 + +### Fixed +- Display name corrected to "OpenClaw Token Optimizer" on ClawHub (was truncated in 1.3.1/1.3.2) +- Slug preserved: `openclaw-token-optimizer` + +## [1.3.2] - 2026-02-17 + +### Added +- `SECURITY.md` — Full two-category security breakdown (executable scripts vs. reference docs) +- `.clawhubsafe` — SHA256 integrity manifest for all 10 skill files + +### Fixed +- Resolved VT flag: SKILL.md previously claimed `no_network: true` for entire skill, but PROVIDERS.md and config-patches.json reference external APIs — scoped the guarantee to scripts only + +## [1.3.1] - 2026-02-12 + +### Added +- `context_optimizer.py` — Context lazy-loading and complexity-based file recommendations +- `cronjob-model-guide.md` — Model selection guide for cron-based tasks + +### Changed +- Enhanced model_router.py with communication pattern enforcement + +## [1.2.0] - 2026-02-07 + +### Added +- Initial public release +- `heartbeat_optimizer.py`, `model_router.py`, `token_tracker.py` +- `PROVIDERS.md`, `config-patches.json`, `HEARTBEAT.template.md` diff --git a/README.md b/README.md new file mode 100644 index 0000000..6553548 --- /dev/null +++ b/README.md @@ -0,0 +1,192 @@ +# OpenClaw Token Optimizer + +**Reduce OpenClaw token usage and API costs by 50-80%** + +An OpenClaw skill for smart model routing, lazy context loading, optimized heartbeats, budget tracking, and native OpenClaw 2026.2.15 features (session pruning, bootstrap size limits, cache TTL alignment). + +[![ClawHub](https://img.shields.io/badge/ClawHub-openclaw--token--optimizer-blue)](https://clawhub.ai/Asif2BD/openclaw-token-optimizer) +[![Version](https://img.shields.io/badge/version-1.4.2-green)](https://github.com/Asif2BD/OpenClaw-Token-Optimizer/blob/main/CHANGELOG.md) +[![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-yellow.svg)](https://opensource.org/licenses/Apache-2.0) +[![OpenClaw](https://img.shields.io/badge/OpenClaw-Skill-purple)](https://openclaw.ai) + +--- + +## 🚀 Installation + +### Option 1: ClawHub (recommended) +```bash +clawhub install Asif2BD/openclaw-token-optimizer +``` + +Or browse to: [clawhub.ai/Asif2BD/openclaw-token-optimizer](https://clawhub.ai/Asif2BD/openclaw-token-optimizer) + +### Option 2: Manual (GitHub) +```bash +git clone https://github.com/Asif2BD/OpenClaw-Token-Optimizer.git \ + ~/.openclaw/skills/openclaw-token-optimizer +``` +Then add to `openclaw.json`: +```json +{ + "skills": { + "load": { + "extraDirs": ["~/.openclaw/skills/openclaw-token-optimizer"] + } + } +} +``` + +### One-line install prompt for your agent +> "Install the OpenClaw Token Optimizer skill from https://clawhub.ai/Asif2BD/openclaw-token-optimizer — or if ClawHub isn't available, clone https://github.com/Asif2BD/OpenClaw-Token-Optimizer and add the path to skills.load.extraDirs in openclaw.json" + +--- + +## ✨ What's New in v1.4.x (OpenClaw 2026.2.15) + +Three **native config patches** that work today with zero external dependencies: + +### Session Pruning +Auto-trim old tool results when the Anthropic cache TTL expires — reduces cache re-write costs. +```json +{ "agents": { "defaults": { "contextPruning": { "mode": "cache-ttl", "ttl": "5m" } } } } +``` + +### Bootstrap Size Limits +Cap workspace file injection into the system prompt (20-40% reduction for large workspaces). +```json +{ "agents": { "defaults": { "bootstrapMaxChars": 10000, "bootstrapTotalMaxChars": 15000 } } } +``` + +### Cache Retention for Opus +Amortize cache write costs on long Opus sessions. +```json +{ "agents": { "defaults": { "models": { "anthropic/claude-opus-4-5": { "params": { "cacheRetention": "long" } } } } } } +``` + +### Cache TTL Heartbeat Alignment +Keep the Anthropic 1h prompt cache warm — avoid the re-write penalty. +```bash +python3 scripts/heartbeat_optimizer.py cache-ttl +# → recommended_interval: 55min (3300s) +``` + +--- + +## 🛠️ Quick Start + +**1. Context optimization (biggest win):** +```bash +python3 scripts/context_optimizer.py recommend "hi, how are you?" +# → Load only 2 files, skip everything else → ~80% savings +``` + +**2. Model routing:** +```bash +python3 scripts/model_router.py "design a microservices architecture" +# → Complex task → Opus +python3 scripts/model_router.py "thanks!" +# → Simple ack → Sonnet (cheapest available) +``` + +**3. Optimized heartbeat:** +```bash +cp assets/HEARTBEAT.template.md ~/.openclaw/workspace/HEARTBEAT.md +python3 scripts/heartbeat_optimizer.py plan +``` + +**4. Token budget check:** +```bash +python3 scripts/token_tracker.py check +``` + +**5. Cache TTL alignment:** +```bash +python3 scripts/heartbeat_optimizer.py cache-ttl +# Set heartbeat to 55min to keep Anthropic 1h cache warm +``` + +--- + +## 🔍 Native OpenClaw Diagnostics (2026.2.15+) + +``` +/context list → per-file token breakdown (use before applying bootstrap limits) +/context detail → full system prompt breakdown +/usage tokens → append token count to every reply +/usage cost → cumulative cost summary +``` + +--- + +## 📁 Skill Structure + +``` +openclaw-token-optimizer/ +├── SKILL.md ← Skill definition (loaded by OpenClaw) +├── SECURITY.md ← Full security audit + provenance +├── CHANGELOG.md ← Version history +├── .clawhubsafe ← SHA256 integrity manifest (13 files) +├── .clawhubignore ← Files excluded from publish bundle +├── scripts/ +│ ├── context_optimizer.py ← Context lazy-loading +│ ├── model_router.py ← Task classification + model routing +│ ├── heartbeat_optimizer.py ← Interval management + cache-ttl alignment +│ ├── token_tracker.py ← Budget monitoring +│ └── optimize.sh ← Convenience CLI wrapper (calls Python scripts) +├── assets/ +│ ├── config-patches.json ← Ready-to-apply config patches +│ ├── HEARTBEAT.template.md ← Drop-in optimized heartbeat template +│ └── cronjob-model-guide.md ← Model selection for cron tasks +└── references/ + └── PROVIDERS.md ← Multi-provider strategy guide +``` + +--- + +## 📊 Expected Savings + +| Strategy | Context | Model | Monthly (100K tok/day) | Savings | +|---|---|---|---|---| +| Baseline (no optimization) | 50K | Sonnet | $9.00 | 0% | +| Context optimization only | 10K | Sonnet | $5.40 | 40% | +| Model routing only | 50K | Mixed | $5.40 | 40% | +| **Both (this skill)** | **10K** | **Mixed** | **$2.70** | **70%** | + +--- + +## 🔒 Security + +All scripts are **local-only** — no network calls, no subprocess spawning, no system modifications. See [SECURITY.md](SECURITY.md) for full per-script audit. + +Verify integrity: +```bash +cd ~/.openclaw/skills/openclaw-token-optimizer +sha256sum -c .clawhubsafe +``` + +Quick audit (should return nothing): +```bash +grep -r "urllib\|requests\|socket\|subprocess\|curl\|wget" scripts/ +``` + +--- + +## 📜 Changelog + +See [CHANGELOG.md](CHANGELOG.md) for full version history. + +**v1.4.2** — Security scanner fixes (provenance, optimize.sh manifest, SECURITY.md) +**v1.4.1** — `.clawhubignore` added (fixes public visibility) +**v1.4.0** — Native OpenClaw 2026.2.15 features (session pruning, bootstrap limits, cache TTL) +**v1.3.3** — Correct display name on ClawHub +**v1.3.2** — Security audit, SECURITY.md, .clawhubsafe manifest + +--- + +## 🔗 Links + +- **ClawHub:** https://clawhub.ai/Asif2BD/openclaw-token-optimizer +- **GitHub:** https://github.com/Asif2BD/OpenClaw-Token-Optimizer +- **OpenClaw Docs:** https://docs.openclaw.ai +- **License:** Apache 2.0 +- **Author:** [Asif2BD](https://github.com/Asif2BD) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..41cc8d3 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,314 @@ +# Security Documentation + +This document provides detailed security analysis of the OpenClaw Token Optimizer skill. + +## Purpose + +This skill helps reduce OpenClaw API costs by optimizing context loading and model selection. + +## Two-Part Architecture — Read This First + +This skill has **two distinct categories of files** with different security profiles: + +### Category 1: Executable Scripts (`scripts/*.py`) +These are the actual working components of the skill. +- **Network access:** None +- **External API keys:** None required +- **Code execution:** No eval/exec/compile +- **Data storage:** Local JSON files in `~/.openclaw/workspace/memory/` only +- **Verdict:** ✅ Safe to run + +### Category 2: Reference Documentation (`references/PROVIDERS.md`, `assets/config-patches.json`) +These are informational guides describing optional multi-provider strategies. +- **Network access:** Described (not performed by these files) +- **External API keys:** Referenced as examples (`${OPENROUTER_API_KEY}`, `${TOGETHER_API_KEY}`) +- **Code execution:** None — these are JSON/Markdown documentation +- **Purpose:** Help users who *choose* to configure alternative providers (OpenRouter, Together.ai, etc.) +- **Verdict:** ✅ Safe files — but describe services that require external credentials if you choose to use them + +**Why this matters:** Security scanners that flag API key patterns or external service references in documentation files are technically correct — those references exist. They are not executable, not auto-applied, and require explicit manual user action to use. + +## Security Flag Explanation + +**Why VirusTotal or AV tools may flag this skill:** + +1. **API key patterns in config-patches.json** — `${OPENROUTER_API_KEY}`, `${TOGETHER_API_KEY}` are placeholder strings for optional manual configuration. They are not credentials and not automatically used. +2. **External URLs in PROVIDERS.md** — References to openrouter.ai, together.ai, etc. are documentation links, not network calls. +3. **"Optimizer" keyword + file operations** — Common AV heuristic false positive. +4. **Executable Python scripts with shebang** — Standard Python scripts, no dangerous operations. + +These are documentation-level references, not executable network code. + +## Shell Wrapper: optimize.sh + +`scripts/optimize.sh` is a **convenience CLI wrapper** — it does nothing except call the bundled Python scripts in this same directory. It is not an installer, not a downloader, and makes no network calls. + +**What it does (complete source):** +```bash +case "$1" in + route|model) python3 "$SCRIPT_DIR/model_router.py" "$@" ;; + context) python3 "$SCRIPT_DIR/context_optimizer.py" generate-agents ;; + recommend) python3 "$SCRIPT_DIR/context_optimizer.py" recommend "$2" ;; + budget) python3 "$SCRIPT_DIR/token_tracker.py" check ;; + heartbeat) cp "$SCRIPT_DIR/../assets/HEARTBEAT.template.md" ~/.openclaw/workspace/HEARTBEAT.md ;; +esac +``` + +**Security properties:** +- ✅ No network requests +- ✅ No system modifications +- ✅ No subprocess spawning beyond the Python scripts already bundled +- ✅ No eval, exec, or dynamic code execution +- ✅ Only calls scripts already in this package (same directory via `$SCRIPT_DIR`) +- ✅ Included in `.clawhubsafe` SHA256 manifest + +**To verify before running:** +```bash +grep -E "curl|wget|nc |ncat|ssh|sudo|chmod|eval|exec\(" scripts/optimize.sh +# Should return nothing +``` + +**If you prefer not to use the shell wrapper:** use the Python scripts directly (all documented in SKILL.md). The wrapper is optional. + +--- + +## Script-by-Script Security Analysis + +### 1. context_optimizer.py + +**Purpose**: Analyze prompts to determine minimal context requirements + +**Operations**: +- Reads JSON state file from `~/.openclaw/workspace/memory/context-usage.json` +- Classifies prompt complexity (simple/medium/complex) +- Recommends which files to load +- Generates optimized AGENTS.md template +- Writes usage statistics to JSON + +**Security**: +- ✅ No network requests +- ✅ No code execution (no eval, exec, compile) +- ✅ Only standard library imports: `json, re, pathlib, datetime` +- ✅ Read/write permissions limited to OpenClaw workspace +- ✅ No subprocess calls +- ✅ No system modifications + +**Data Handling**: +- Stores: File access counts, last access timestamps +- Location: `~/.openclaw/workspace/memory/context-usage.json` +- Privacy: All data local, never transmitted + +### 2. heartbeat_optimizer.py + +**Purpose**: Optimize heartbeat check scheduling to reduce unnecessary API calls + +**Operations**: +- Reads heartbeat state from `~/.openclaw/workspace/memory/heartbeat-state.json` +- Determines which checks are due based on intervals +- Records when checks were last performed +- Enforces quiet hours (23:00-08:00) + +**Security**: +- ✅ No network requests +- ✅ No code execution +- ✅ Only standard library imports: `json, os, datetime, pathlib` +- ✅ Read/write limited to heartbeat state file +- ✅ No system commands + +**Data Handling**: +- Stores: Last check timestamps, check intervals +- Location: `~/.openclaw/workspace/memory/heartbeat-state.json` +- Privacy: All local, no telemetry + +### 3. model_router.py + +**Purpose**: Suggest appropriate model based on task complexity to reduce costs + +**Operations**: +- Analyzes prompt text +- Classifies task complexity +- Recommends cheapest appropriate model +- No state file (pure analysis) + +**Security**: +- ✅ No network requests +- ✅ No code execution +- ✅ Only standard library imports: `json, re` +- ✅ No file writes +- ✅ Stateless operation + +**Data Handling**: +- No data storage +- No external communication +- Pure text analysis + +### 4. token_tracker.py + +**Purpose**: Monitor token usage and enforce budgets + +**Operations**: +- Reads budget configuration from `~/.openclaw/workspace/memory/token-budget.json` +- Tracks usage against limits +- Provides warnings and alerts +- Records daily/monthly usage + +**Security**: +- ✅ No network requests +- ✅ No code execution +- ✅ Only standard library imports: `json, os, datetime, pathlib` +- ✅ Read/write limited to budget file +- ✅ No system access + +**Data Handling**: +- Stores: Usage totals, budget limits, alert thresholds +- Location: `~/.openclaw/workspace/memory/token-budget.json` +- Privacy: Local only, no transmission + +## Assets & References + +### Assets (Templates & Config) + +- `HEARTBEAT.template.md` - Markdown template for optimized heartbeat workflow +- `config-patches.json` - Suggested OpenClaw config optimizations +- `cronjob-model-guide.md` - Documentation for cron-based model routing + +**Security**: Plain text/JSON files, no code execution + +### References (Documentation) + +- `PROVIDERS.md` - Multi-provider strategy documentation + +**Security**: Plain text markdown, informational only + +## Verification + +### Check File Integrity + +```bash +cd ~/.openclaw/skills/token-optimizer +sha256sum -c .clawhubsafe +``` + +### Audit Code Yourself + +```bash +# Search for dangerous functions (should return nothing) +grep -r "eval(\|exec(\|__import__\|compile(\|subprocess\|os.system" scripts/ + +# Search for network operations (should return nothing) +grep -r "urllib\|requests\|http\|socket\|download\|fetch" scripts/ + +# Search for system modifications (should return nothing) +grep -r "rm -rf\|sudo\|chmod 777\|chown" . +``` + +### Review Imports + +All scripts use only Python standard library: +- `json` - JSON parsing +- `re` - Regular expressions for text analysis +- `pathlib` - File path handling +- `datetime` - Timestamp management +- `os` - Environment variables and path operations + +No third-party libraries. No network libraries. + +## Data Privacy + +**What data is stored:** +- File access patterns (which files loaded when) +- Heartbeat check timestamps +- Token usage totals +- Budget configurations + +**Where data is stored:** +- `~/.openclaw/workspace/memory/` (local filesystem only) + +**What is NOT collected:** +- Prompt content +- User messages +- API keys +- Personal information +- System information +- Network data + +**External communication:** +- None. Zero network requests. +- No telemetry +- No analytics +- No phone home + +## Threat Model + +**What this skill CAN do:** +- Read/write JSON files in OpenClaw workspace +- Analyze text for complexity classification +- Generate markdown templates +- Provide recommendations via stdout + +**What this skill CANNOT do:** +- Execute arbitrary code +- Make network requests +- Modify system files outside workspace +- Access sensitive data +- Run system commands +- Spawn subprocesses + +**Risk Level**: Minimal +- Operates entirely within OpenClaw sandbox +- No privileged operations +- No external dependencies +- Auditable source code + +## Provenance & Source + +This skill is maintained by **Asif2BD** (M Asif Rahman). All source code is publicly auditable: + +- **GitHub repository:** https://github.com/Asif2BD/OpenClaw-Token-Optimizer +- **ClawHub listing:** https://clawhub.ai/Asif2BD/openclaw-token-optimizer +- **Author:** Asif2BD +- **License:** Apache 2.0 + +Every file in this skill bundle is listed in `.clawhubsafe` with its SHA256 checksum. Verify integrity at any time: +```bash +cd ~/.openclaw/skills/openclaw-token-optimizer +sha256sum -c .clawhubsafe +``` + +The README references the GitHub repo for users who want to review the source before installing. That link is informational — nothing in the skill auto-downloads from it. + +--- + +## License & Attribution + +- **License**: Apache 2.0 +- **Author**: Asif2BD +- **Repository**: https://github.com/Asif2BD/openclaw-token-optimizer (if published) +- **ClawHub**: https://clawhub.ai/Asif2BD/openclaw-token-optimizer + +## Reporting Security Issues + +If you discover a security vulnerability: + +1. **Do NOT open a public issue** +2. Contact: [via ClawHub or GitHub] +3. Include: Affected files, severity, reproduction steps + +## Changelog + +- **2026-02-12**: Added comprehensive security documentation +- **2026-02-07**: Initial release (v1.2.0) + +## VirusTotal Status + +**Current Status**: False positive by heuristic analysis + +**Detection Reason**: Generic heuristics flagging "optimizer" keyword + file operations + +**Actual Threat**: None + +**Recommended Action**: Whitelist this skill or review source code yourself + +--- + +**Bottom Line**: This skill is safe. It's a productivity tool that helps reduce API costs through intelligent context loading and model selection. All code is auditable, uses only standard Python libraries, and performs no dangerous operations. diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..d0dbe7e --- /dev/null +++ b/SKILL.md @@ -0,0 +1,651 @@ +--- +name: token-optimizer +description: "通过智能模型路由、心跳优化、预算跟踪和原生2026.2.15功能(会话修剪、引导大小限制、缓存TTL对齐)降低OpenClaw令牌使用和API成本。在令牌成本高、遇到API速率限制或大规模托管多个代理时使用。4个可执行脚本(context_optimizer、model_router、heartbeat_optimizer、token_tracker)仅为本地使用——无网络请求、无子进程调用、无系统修改。" +version: 3.0.0 +homepage: https://github.com/Asif2BD/OpenClaw-Token-Optimizer +source: https://github.com/Asif2BD/OpenClaw-Token-Optimizer +author: Asif2BD +security: + verified: true + auditor: Oracle (Matrix Zion) + audit_date: 2026-02-18 + scripts_no_network: true + scripts_no_code_execution: true + scripts_no_subprocess: true + scripts_data_local_only: true + reference_files_describe_external_services: true + optimize_sh_is_convenience_wrapper: true + optimize_sh_only_calls_bundled_python_scripts: true +--- + +# Token Optimizer + +Comprehensive toolkit for reducing token usage and API costs in OpenClaw deployments. Combines smart model routing, optimized heartbeat intervals, usage tracking, and multi-provider strategies. + +## Quick Start + +**Immediate actions** (no config changes needed): + +1. **Generate optimized AGENTS.md (BIGGEST WIN!):** + ```bash + python3 scripts/context_optimizer.py generate-agents + # Creates AGENTS.md.optimized — review and replace your current AGENTS.md + ``` + +2. **Check what context you ACTUALLY need:** + ```bash + python3 scripts/context_optimizer.py recommend "hi, how are you?" + # Shows: Only 2 files needed (not 50+!) + ``` + +3. **Install optimized heartbeat:** + ```bash + cp assets/HEARTBEAT.template.md ~/.openclaw/workspace/HEARTBEAT.md + ``` + +4. **Enforce cheaper models for casual chat:** + ```bash + python3 scripts/model_router.py "thanks!" + # Single-provider Anthropic setup: Use Sonnet, not Opus + # Multi-provider setup (OpenRouter/Together): Use Haiku for max savings + ``` + +5. **Check current token budget:** + ```bash + python3 scripts/token_tracker.py check + ``` + +**Expected savings:** 50-80% reduction in token costs for typical workloads (context optimization is the biggest factor!). + +## Core Capabilities + +### 0. Lazy Skill Loading (NEW in v3.0 — BIGGEST WIN!) + +**The single highest-impact optimization available.** Most agents burn 3,000–15,000 tokens per session loading skill files they never use. Stop that first. + +**The pattern:** + +1. Create a lightweight `SKILLS.md` catalog in your workspace (~300 tokens — list of skills + when to load them) +2. Only load individual SKILL.md files when a task actually needs them +3. Apply the same logic to memory files — load MEMORY.md at startup, daily logs only on demand + +**Token savings:** + +| Library size | Before (eager) | After (lazy) | Savings | +|---|---|---|---| +| 5 skills | ~3,000 tokens | ~600 tokens | **80%** | +| 10 skills | ~6,500 tokens | ~750 tokens | **88%** | +| 20 skills | ~13,000 tokens | ~900 tokens | **93%** | + +**Quick implementation in AGENTS.md:** + +```markdown +## Skills + +At session start: Read SKILLS.md (the index only — ~300 tokens). +Load individual skill files ONLY when a task requires them. +Never load all skills upfront. +``` + +**Full implementation (with catalog template + optimizer script):** + +```bash +clawhub install openclaw-skill-lazy-loader +``` + +The companion skill `openclaw-skill-lazy-loader` includes a `SKILLS.md.template`, an `AGENTS.md.template` lazy-loading section, and a `context_optimizer.py` CLI that recommends exactly which skills to load for any given task. + +**Lazy loading handles context loading costs. The remaining capabilities below handle runtime costs.** Together they cover the full token lifecycle. + +--- + +### 1. Context Optimization (NEW!) + +**Biggest token saver** — Only load files you actually need, not everything upfront. + +**Problem:** Default OpenClaw loads ALL context files every session: +- SOUL.md, AGENTS.md, USER.md, TOOLS.md, MEMORY.md +- docs/**/*.md (hundreds of files) +- memory/2026-*.md (daily logs) +- Total: Often 50K+ tokens before user even speaks! + +**Solution:** Lazy loading based on prompt complexity. + +**Usage:** +```bash +python3 scripts/context_optimizer.py recommend "" +``` + +**Examples:** +```bash +# Simple greeting → minimal context (2 files only!) +context_optimizer.py recommend "hi" +→ Load: SOUL.md, IDENTITY.md +→ Skip: Everything else +→ Savings: ~80% of context + +# Standard work → selective loading +context_optimizer.py recommend "write a function" +→ Load: SOUL.md, IDENTITY.md, memory/TODAY.md +→ Skip: docs, old memory, knowledge base +→ Savings: ~50% of context + +# Complex task → full context +context_optimizer.py recommend "analyze our entire architecture" +→ Load: SOUL.md, IDENTITY.md, MEMORY.md, memory/TODAY+YESTERDAY.md +→ Conditionally load: Relevant docs only +→ Savings: ~30% of context +``` + +**Output format:** +```json +{ + "complexity": "simple", + "context_level": "minimal", + "recommended_files": ["SOUL.md", "IDENTITY.md"], + "file_count": 2, + "savings_percent": 80, + "skip_patterns": ["docs/**/*.md", "memory/20*.md"] +} +``` + +**Integration pattern:** +Before loading context for a new session: +```python +from context_optimizer import recommend_context_bundle + +user_prompt = "thanks for your help" +recommendation = recommend_context_bundle(user_prompt) + +if recommendation["context_level"] == "minimal": + # Load only SOUL.md + IDENTITY.md + # Skip everything else + # Save ~80% tokens! +``` + +**Generate optimized AGENTS.md:** +```bash +context_optimizer.py generate-agents +# Creates AGENTS.md.optimized with lazy loading instructions +# Review and replace your current AGENTS.md +``` + +**Expected savings:** 50-80% reduction in context tokens. + +### 2. Smart Model Routing (ENHANCED!) + +Automatically classify tasks and route to appropriate model tiers. + +**NEW: Communication pattern enforcement** — Never waste Opus tokens on "hi" or "thanks"! + +**Usage:** +```bash +python3 scripts/model_router.py "" [current_model] [force_tier] +``` + +**Examples:** +```bash +# Communication (NEW!) → ALWAYS Haiku +python3 scripts/model_router.py "thanks!" +python3 scripts/model_router.py "hi" +python3 scripts/model_router.py "ok got it" +→ Enforced: Haiku (NEVER Sonnet/Opus for casual chat) + +# Simple task → suggests Haiku +python3 scripts/model_router.py "read the log file" + +# Medium task → suggests Sonnet +python3 scripts/model_router.py "write a function to parse JSON" + +# Complex task → suggests Opus +python3 scripts/model_router.py "design a microservices architecture" +``` + +**Patterns enforced to Haiku (NEVER Sonnet/Opus):** + +*Communication:* +- Greetings: hi, hey, hello, yo +- Thanks: thanks, thank you, thx +- Acknowledgments: ok, sure, got it, understood +- Short responses: yes, no, yep, nope +- Single words or very short phrases + +*Background tasks:* +- Heartbeat checks: "check email", "monitor servers" +- Cronjobs: "scheduled task", "periodic check", "reminder" +- Document parsing: "parse CSV", "extract data from log", "read JSON" +- Log scanning: "scan error logs", "process logs" + +**Integration pattern:** +```python +from model_router import route_task + +user_prompt = "show me the config" +routing = route_task(user_prompt) + +if routing["should_switch"]: + # Use routing["recommended_model"] + # Save routing["cost_savings_percent"] +``` + +**Customization:** +Edit `ROUTING_RULES` or `COMMUNICATION_PATTERNS` in `scripts/model_router.py` to adjust patterns and keywords. + +### 3. Heartbeat Optimization + +Reduce API calls from heartbeat polling with smart interval tracking: + +**Setup:** +```bash +# Copy template to workspace +cp assets/HEARTBEAT.template.md ~/.openclaw/workspace/HEARTBEAT.md + +# Plan which checks should run +python3 scripts/heartbeat_optimizer.py plan +``` + +**Commands:** +```bash +# Check if specific type should run now +heartbeat_optimizer.py check email +heartbeat_optimizer.py check calendar + +# Record that a check was performed +heartbeat_optimizer.py record email + +# Update check interval (seconds) +heartbeat_optimizer.py interval email 7200 # 2 hours + +# Reset state +heartbeat_optimizer.py reset +``` + +**How it works:** +- Tracks last check time for each type (email, calendar, weather, etc.) +- Enforces minimum intervals before re-checking +- Respects quiet hours (23:00-08:00) — skips all checks +- Returns `HEARTBEAT_OK` when nothing needs attention (saves tokens) + +**Default intervals:** +- Email: 60 minutes +- Calendar: 2 hours +- Weather: 4 hours +- Social: 2 hours +- Monitoring: 30 minutes + +**Integration in HEARTBEAT.md:** +```markdown +## Email Check +Run only if: `heartbeat_optimizer.py check email` → `should_check: true` +After checking: `heartbeat_optimizer.py record email` +``` + +**Expected savings:** 50% reduction in heartbeat API calls. + +**Model enforcement:** Heartbeat should ALWAYS use Haiku — see updated `HEARTBEAT.template.md` for model override instructions. + +### 4. Cronjob Optimization (NEW!) + +**Problem:** Cronjobs often default to expensive models (Sonnet/Opus) even for routine tasks. + +**Solution:** Always specify Haiku for 90% of scheduled tasks. + +**See:** `assets/cronjob-model-guide.md` for comprehensive guide with examples. + +**Quick reference:** + +| Task Type | Model | Example | +|-----------|-------|---------| +| Monitoring/alerts | Haiku | Check server health, disk space | +| Data parsing | Haiku | Extract CSV/JSON/logs | +| Reminders | Haiku | Daily standup, backup reminders | +| Simple reports | Haiku | Status summaries | +| Content generation | Sonnet | Blog summaries (quality matters) | +| Deep analysis | Sonnet | Weekly insights | +| Complex reasoning | Never use Opus for cronjobs | + +**Example (good):** +```bash +# Parse daily logs with Haiku +cron add --schedule "0 2 * * *" \ + --payload '{ + "kind":"agentTurn", + "message":"Parse yesterday error logs and summarize", + "model":"anthropic/claude-haiku-4" + }' \ + --sessionTarget isolated +``` + +**Example (bad):** +```bash +# ❌ Using Opus for simple check (60x more expensive!) +cron add --schedule "*/15 * * * *" \ + --payload '{ + "kind":"agentTurn", + "message":"Check email", + "model":"anthropic/claude-opus-4" + }' \ + --sessionTarget isolated +``` + +**Savings:** Using Haiku instead of Opus for 10 daily cronjobs = **$17.70/month saved per agent**. + +**Integration with model_router:** +```bash +# Test if your cronjob should use Haiku +model_router.py "parse daily error logs" +# → Output: Haiku (background task pattern detected) +``` + +### 5. Token Budget Tracking + +Monitor usage and alert when approaching limits: + +**Setup:** +```bash +# Check current daily usage +python3 scripts/token_tracker.py check + +# Get model suggestions +python3 scripts/token_tracker.py suggest general + +# Reset daily tracking +python3 scripts/token_tracker.py reset +``` + +**Output format:** +```json +{ + "date": "2026-02-06", + "cost": 2.50, + "tokens": 50000, + "limit": 5.00, + "percent_used": 50, + "status": "ok", + "alert": null +} +``` + +**Status levels:** +- `ok`: Below 80% of daily limit +- `warning`: 80-99% of daily limit +- `exceeded`: Over daily limit + +**Integration pattern:** +Before starting expensive operations, check budget: +```python +import json +import subprocess + +result = subprocess.run( + ["python3", "scripts/token_tracker.py", "check"], + capture_output=True, text=True +) +budget = json.loads(result.stdout) + +if budget["status"] == "exceeded": + # Switch to cheaper model or defer non-urgent work + use_model = "anthropic/claude-haiku-4" +elif budget["status"] == "warning": + # Use balanced model + use_model = "anthropic/claude-sonnet-4-5" +``` + +**Customization:** +Edit `daily_limit_usd` and `warn_threshold` parameters in function calls. + +### 6. Multi-Provider Strategy + +See `references/PROVIDERS.md` for comprehensive guide on: +- Alternative providers (OpenRouter, Together.ai, Google AI Studio) +- Cost comparison tables +- Routing strategies by task complexity +- Fallback chains for rate-limited scenarios +- API key management + +**Quick reference:** + +| Provider | Model | Cost/MTok | Use Case | +|----------|-------|-----------|----------| +| Anthropic | Haiku 4 | $0.25 | Simple tasks | +| Anthropic | Sonnet 4.5 | $3.00 | Balanced default | +| Anthropic | Opus 4 | $15.00 | Complex reasoning | +| OpenRouter | Gemini 2.5 Flash | $0.075 | Bulk operations | +| Google AI | Gemini 2.0 Flash Exp | FREE | Dev/testing | +| Together | Llama 3.3 70B | $0.18 | Open alternative | + +## Configuration Patches + +See `assets/config-patches.json` for advanced optimizations: + +**Implemented by this skill:** +- ✅ Heartbeat optimization (fully functional) +- ✅ Token budget tracking (fully functional) +- ✅ Model routing logic (fully functional) + +**Native OpenClaw 2026.2.15 — apply directly:** +- ✅ Session pruning (`contextPruning: cache-ttl`) — auto-trims old tool results after Anthropic cache TTL expires +- ✅ Bootstrap size limits (`bootstrapMaxChars` / `bootstrapTotalMaxChars`) — caps workspace file injection size +- ✅ Cache retention long (`cacheRetention: "long"` for Opus) — amortizes cache write costs + +**Requires OpenClaw core support:** +- ⏳ Prompt caching (Anthropic API feature — verify current status) +- ⏳ Lazy context loading (use `context_optimizer.py` script today) +- ⏳ Multi-provider fallback (partially supported) + +**Apply config patches:** +```bash +# Example: Enable multi-provider fallback +gateway config.patch --patch '{"providers": [...]}' +``` + +## Native OpenClaw Diagnostics (2026.2.15+) + +OpenClaw 2026.2.15 added built-in commands that complement this skill's Python scripts. Use these first for quick diagnostics before reaching for the scripts. + +### Context breakdown +``` +/context list → token count per injected file (shows exactly what's eating your prompt) +/context detail → full breakdown including tools, skills, and system prompt sections +``` +**Use before applying `bootstrap_size_limits`** — see which files are oversized, then set `bootstrapMaxChars` accordingly. + +### Per-response usage tracking +``` +/usage tokens → append token count to every reply +/usage full → append tokens + cost estimate to every reply +/usage cost → show cumulative cost summary from session logs +/usage off → disable usage footer +``` +**Combine with `token_tracker.py`** — `/usage cost` gives session totals; `token_tracker.py` tracks daily budget. + +### Session status +``` +/status → model, context %, last response tokens, estimated cost +``` + +--- + +## Cache TTL Heartbeat Alignment (NEW in v1.4.0) + +**The problem:** Anthropic charges ~3.75x more for cache *writes* than cache *reads*. If your agent goes idle and the 1h cache TTL expires, the next request re-writes the entire prompt cache — expensive. + +**The fix:** Set heartbeat interval to **55min** (just under the 1h TTL). The heartbeat keeps the cache warm, so every subsequent request pays cache-read rates instead. + +```bash +# Get optimal interval for your cache TTL +python3 scripts/heartbeat_optimizer.py cache-ttl +# → recommended_interval: 55min (3300s) +# → explanation: keeps 1h Anthropic cache warm + +# Custom TTL (e.g., if you've configured 2h cache) +python3 scripts/heartbeat_optimizer.py cache-ttl 7200 +# → recommended_interval: 115min +``` + +**Apply to your OpenClaw config:** +```json +{ + "agents": { + "defaults": { + "heartbeat": { + "every": "55m" + } + } + } +} +``` + +**Who benefits:** Anthropic API key users only. OAuth profiles already default to 1h heartbeat (OpenClaw smart default). API key profiles default to 30min — bumping to 55min is both cheaper (fewer calls) and cache-warm. + +--- + +## Deployment Patterns + +### For Personal Use +1. Install optimized `HEARTBEAT.md` +2. Run budget checks before expensive operations +3. Manually route complex tasks to Opus only when needed + +**Expected savings:** 20-30% + +### For Managed Hosting (xCloud, etc.) +1. Default all agents to Haiku +2. Route user interactions to Sonnet +3. Reserve Opus for explicitly complex requests +4. Use Gemini Flash for background operations +5. Implement daily budget caps per customer + +**Expected savings:** 40-60% + +### For High-Volume Deployments +1. Use multi-provider fallback (OpenRouter + Together.ai) +2. Implement aggressive routing (80% Gemini, 15% Haiku, 5% Sonnet) +3. Deploy local Ollama for offline/cheap operations +4. Batch heartbeat checks (every 2-4 hours, not 30 min) + +**Expected savings:** 70-90% + +## Integration Examples + +### Workflow: Smart Task Handling +```bash +# 1. User sends message +user_msg="debug this error in the logs" + +# 2. Route to appropriate model +routing=$(python3 scripts/model_router.py "$user_msg") +model=$(echo $routing | jq -r .recommended_model) + +# 3. Check budget before proceeding +budget=$(python3 scripts/token_tracker.py check) +status=$(echo $budget | jq -r .status) + +if [ "$status" = "exceeded" ]; then + # Use cheapest model regardless of routing + model="anthropic/claude-haiku-4" +fi + +# 4. Process with selected model +# (OpenClaw handles this via config or override) +``` + +### Workflow: Optimized Heartbeat +```markdown +## HEARTBEAT.md + +# Plan what to check +result=$(python3 scripts/heartbeat_optimizer.py plan) +should_run=$(echo $result | jq -r .should_run) + +if [ "$should_run" = "false" ]; then + echo "HEARTBEAT_OK" + exit 0 +fi + +# Run only planned checks +planned=$(echo $result | jq -r '.planned[].type') + +for check in $planned; do + case $check in + email) check_email ;; + calendar) check_calendar ;; + esac + python3 scripts/heartbeat_optimizer.py record $check +done +``` + +## Troubleshooting + +**Issue:** Scripts fail with "module not found" +- **Fix:** Ensure Python 3.7+ is installed. Scripts use only stdlib. + +**Issue:** State files not persisting +- **Fix:** Check that `~/.openclaw/workspace/memory/` directory exists and is writable. + +**Issue:** Budget tracking shows $0.00 +- **Fix:** `token_tracker.py` needs integration with OpenClaw's `session_status` tool. Currently tracks manually recorded usage. + +**Issue:** Routing suggests wrong model tier +- **Fix:** Customize `ROUTING_RULES` in `model_router.py` for your specific patterns. + +## Maintenance + +**Daily:** +- Check budget status: `token_tracker.py check` + +**Weekly:** +- Review routing accuracy (are suggestions correct?) +- Adjust heartbeat intervals based on activity + +**Monthly:** +- Compare costs before/after optimization +- Review and update `PROVIDERS.md` with new options + +## Cost Estimation + +**Example: 100K tokens/day workload** + +Without skill: +- 50K context tokens + 50K conversation tokens = 100K total +- All Sonnet: 100K × $3/MTok = **$0.30/day = $9/month** + +| Strategy | Context | Model | Daily Cost | Monthly | Savings | +|----------|---------|-------|-----------|---------|---------| +| Baseline (no optimization) | 50K | Sonnet | $0.30 | $9.00 | 0% | +| Context opt only | 10K (-80%) | Sonnet | $0.18 | $5.40 | 40% | +| Model routing only | 50K | Mixed | $0.18 | $5.40 | 40% | +| **Both (this skill)** | **10K** | **Mixed** | **$0.09** | **$2.70** | **70%** | +| Aggressive + Gemini | 10K | Gemini | $0.03 | $0.90 | **90%** | + +**Key insight:** Context optimization (50K → 10K tokens) saves MORE than model routing! + +**xCloud hosting scenario** (100 customers, 50K tokens/customer/day): +- Baseline (all Sonnet, full context): $450/month +- With token-optimizer: $135/month +- **Savings: $315/month per 100 customers (70%)** + +## Resources + +### Scripts (4 total) +- **`context_optimizer.py`** — Context loading optimization and lazy loading (NEW!) +- **`model_router.py`** — Task classification, model suggestions, and communication enforcement (ENHANCED!) +- **`heartbeat_optimizer.py`** — Interval management and check scheduling +- **`token_tracker.py`** — Budget monitoring and alerts + +### References +- `PROVIDERS.md` — Alternative AI providers, pricing, and routing strategies + +### Assets (3 total) +- **`HEARTBEAT.template.md`** — Drop-in optimized heartbeat template with Haiku enforcement (ENHANCED!) +- **`cronjob-model-guide.md`** — Complete guide for choosing models in cronjobs (NEW!) +- **`config-patches.json`** — Advanced configuration examples + +## Future Enhancements + +Ideas for extending this skill: +1. **Auto-routing integration** — Hook into OpenClaw message pipeline +2. **Real-time usage tracking** — Parse session_status automatically +3. **Cost forecasting** — Predict monthly spend based on recent usage +4. **Provider health monitoring** — Track API latency and failures +5. **A/B testing** — Compare quality across different routing strategies diff --git a/_meta.json b/_meta.json new file mode 100644 index 0000000..cf6d7c9 --- /dev/null +++ b/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn78dz5s9fev1b0vez1kxz1fj580ckv2", + "slug": "openclaw-token-optimizer", + "version": "3.0.0", + "publishedAt": 1772243871515 +} \ No newline at end of file diff --git a/scripts/context_optimizer.py b/scripts/context_optimizer.py new file mode 100644 index 0000000..a2215a0 --- /dev/null +++ b/scripts/context_optimizer.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python3 +""" +Context optimizer - Analyze and minimize context loading. +Tracks which files are actually needed and creates minimal bundles. +""" +import json +import re +from pathlib import Path +from datetime import datetime, timedelta + +STATE_FILE = Path.home() / ".openclaw/workspace/memory/context-usage.json" + +# Files that should ALWAYS be loaded (identity/personality) +ALWAYS_LOAD = [ + "SOUL.md", + "IDENTITY.md" +] + +# Files to load on-demand based on triggers +CONDITIONAL_FILES = { + "AGENTS.md": ["workflow", "process", "how do i", "remember", "what should"], + "USER.md": ["user", "human", "owner", "about you", "who are you helping"], + "TOOLS.md": ["tool", "camera", "ssh", "voice", "tts", "device"], + "MEMORY.md": ["remember", "recall", "history", "past", "before", "last time"], + "HEARTBEAT.md": ["heartbeat", "check", "monitor", "alert"], +} + +# Files to NEVER load for simple conversations +SKIP_FOR_SIMPLE = [ + "docs/**/*.md", # Documentation + "memory/20*.md", # Old daily logs + "knowledge/**/*", # Knowledge base + "tasks/**/*", # Task tracking +] + +def load_usage_state(): + """Load context usage tracking.""" + if STATE_FILE.exists(): + with open(STATE_FILE, 'r') as f: + return json.load(f) + return { + "file_access_count": {}, + "last_accessed": {}, + "session_summaries": [] + } + +def save_usage_state(state): + """Save context usage tracking.""" + STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(STATE_FILE, 'w') as f: + json.dump(state, f, indent=2) + +def classify_prompt(prompt): + """Classify prompt to determine context needs. + + Returns: + tuple of (complexity, context_level, reasoning) + + complexity: simple | medium | complex + context_level: minimal | standard | full + """ + prompt_lower = prompt.lower() + + # Simple conversational patterns (minimal context) + simple_patterns = [ + r'^(hi|hey|hello|thanks|thank you|ok|okay|yes|no|sure)\b', + r'^(what|how)\'s (up|it going)', + r'^\w{1,20}$', # Single word + r'^(good|great|nice|cool)', + ] + + for pattern in simple_patterns: + if re.search(pattern, prompt_lower): + return ("simple", "minimal", "Conversational/greeting pattern") + + # Check for file/documentation references + if any(word in prompt_lower for word in ["read", "show", "file", "doc", "content"]): + return ("simple", "standard", "File access request") + + # Check for memory/history references + if any(word in prompt_lower for word in ["remember", "recall", "history", "before", "last time"]): + return ("medium", "full", "Memory access needed") + + # Check for complex task indicators + complex_indicators = ["design", "architect", "plan", "strategy", "analyze deeply", "comprehensive"] + if any(word in prompt_lower for word in complex_indicators): + return ("complex", "full", "Complex task requiring full context") + + # Default to standard for normal work requests + return ("medium", "standard", "Standard work request") + +def recommend_context_bundle(prompt, current_files=None): + """Recommend which files to load for a given prompt. + + Args: + prompt: User's message + current_files: List of files currently loaded (optional) + + Returns: + dict with recommendations + """ + complexity, context_level, reasoning = classify_prompt(prompt) + prompt_lower = prompt.lower() + + # Start with always-load files + recommended = set(ALWAYS_LOAD) + + if context_level == "minimal": + # For simple conversations, ONLY identity files + pass + + elif context_level == "standard": + # Add conditionally-loaded files based on triggers + for file, triggers in CONDITIONAL_FILES.items(): + if any(trigger in prompt_lower for trigger in triggers): + recommended.add(file) + + # Add today's memory log only + today = datetime.now().strftime("%Y-%m-%d") + recommended.add(f"memory/{today}.md") + + elif context_level == "full": + # Add all conditional files + recommended.update(CONDITIONAL_FILES.keys()) + + # Add today + yesterday memory logs + today = datetime.now() + yesterday = today - timedelta(days=1) + recommended.add(f"memory/{today.strftime('%Y-%m-%d')}.md") + recommended.add(f"memory/{yesterday.strftime('%Y-%m-%d')}.md") + + # Add MEMORY.md for long-term context + recommended.add("MEMORY.md") + + # Calculate savings + if current_files: + current_count = len(current_files) + recommended_count = len(recommended) + savings_percent = ((current_count - recommended_count) / current_count) * 100 + else: + savings_percent = None + + return { + "complexity": complexity, + "context_level": context_level, + "reasoning": reasoning, + "recommended_files": sorted(list(recommended)), + "file_count": len(recommended), + "savings_percent": savings_percent, + "skip_patterns": SKIP_FOR_SIMPLE if context_level == "minimal" else [] + } + +def record_file_access(file_path): + """Record that a file was accessed.""" + state = load_usage_state() + + # Increment access count + state["file_access_count"][file_path] = state["file_access_count"].get(file_path, 0) + 1 + + # Update last accessed timestamp + state["last_accessed"][file_path] = datetime.now().isoformat() + + save_usage_state(state) + +def get_usage_stats(): + """Get file usage statistics. + + Returns: + dict with frequently/rarely accessed files + """ + state = load_usage_state() + + # Sort by access count + sorted_files = sorted( + state["file_access_count"].items(), + key=lambda x: x[1], + reverse=True + ) + + total_accesses = sum(state["file_access_count"].values()) + + # Classify files + frequent = [] # Top 20% of accesses + occasional = [] # Middle 60% + rare = [] # Bottom 20% + + if sorted_files: + threshold_frequent = total_accesses * 0.2 + threshold_rare = total_accesses * 0.8 + + cumulative = 0 + for file, count in sorted_files: + cumulative += count + + if cumulative <= threshold_frequent: + frequent.append({"file": file, "count": count}) + elif cumulative <= threshold_rare: + occasional.append({"file": file, "count": count}) + else: + rare.append({"file": file, "count": count}) + + return { + "total_accesses": total_accesses, + "unique_files": len(sorted_files), + "frequent": frequent, + "occasional": occasional, + "rare": rare, + "recommendation": f"Consider loading frequently accessed files upfront, lazy-load rare files" + } + +def generate_optimized_agents_md(): + """Generate an optimized AGENTS.md with lazy loading instructions. + + Returns: + str with new AGENTS.md content + """ + return """# AGENTS.md - Token-Optimized Workspace + +## 🎯 Context Loading Strategy (OPTIMIZED) + +**Default: Minimal context, load on-demand** + +### Every Session (Always Load) +1. Read `SOUL.md` — Who you are (identity/personality) +2. Read `IDENTITY.md` — Your role/name + +**Stop there.** Don't load anything else unless needed. + +### Load On-Demand Only + +**When user mentions memory/history:** +- Read `MEMORY.md` +- Read `memory/YYYY-MM-DD.md` (today only) + +**When user asks about workflows/processes:** +- Read `AGENTS.md` (this file) + +**When user asks about tools/devices:** +- Read `TOOLS.md` + +**When user asks about themselves:** +- Read `USER.md` + +**Never load automatically:** +- ❌ Documentation (`docs/**/*.md`) — load only when explicitly referenced +- ❌ Old memory logs (`memory/2026-01-*.md`) — load only if user mentions date +- ❌ Knowledge base (`knowledge/**/*`) — load only when user asks about specific topic +- ❌ Task files (`tasks/**/*`) — load only when user references task + +### Context by Conversation Type + +**Simple conversation** (hi, thanks, yes, quick question): +- Load: SOUL.md, IDENTITY.md +- Skip: Everything else +- **Token savings: ~80%** + +**Standard work request** (write code, check file): +- Load: SOUL.md, IDENTITY.md, memory/TODAY.md +- Conditionally load: TOOLS.md (if mentions tools) +- Skip: docs, old memory logs +- **Token savings: ~50%** + +**Complex task** (design system, analyze history): +- Load: SOUL.md, IDENTITY.md, MEMORY.md, memory/TODAY.md, memory/YESTERDAY.md +- Conditionally load: Relevant docs/knowledge +- Skip: Unrelated documentation +- **Token savings: ~30%** + +## 🔥 Model Selection (ENFORCED) + +**Simple conversations → HAIKU ONLY** +- Greetings, acknowledgments, simple questions +- Never use Sonnet/Opus for casual chat +- Override: `session_status model=haiku-4` + +**Standard work → SONNET** +- Code writing, file edits, explanations +- Default model for most work + +**Complex reasoning → OPUS** +- Architecture design, deep analysis +- Use sparingly, only when explicitly needed + +## 💾 Memory (Lazy Loading) + +**Daily notes:** `memory/YYYY-MM-DD.md` +- ✅ Load TODAY when user asks about recent work +- ❌ Don't load YESTERDAY unless explicitly needed +- ❌ Don't load older logs automatically + +**Long-term:** `MEMORY.md` +- ✅ Load when user mentions "remember", "history", "before" +- ❌ Don't load for simple conversations + +## 📊 Heartbeats (Optimized) + +Use `heartbeat_optimizer.py` from token-optimizer skill: +- Check only what needs checking (not everything every time) +- Skip during quiet hours (23:00-08:00) +- Return `HEARTBEAT_OK` when nothing to report + +## 🎨 Skills (Lazy Loading) + +**Don't pre-read skill documentation.** + +When skill triggers: +1. Read only the SKILL.md +2. Read only the specific reference files you need +3. Skip examples/assets unless explicitly needed + +## 🚫 Anti-Patterns (What NOT to Do) + +❌ Loading all docs at session start +❌ Re-reading unchanged files +❌ Using Opus for simple chat +❌ Checking everything in every heartbeat +❌ Loading full conversation history for simple questions + +✅ Load minimal context by default +✅ Read files only when referenced +✅ Use cheapest model for the task +✅ Batch heartbeat checks intelligently +✅ Keep context focused on current task + +## 📈 Monitoring + +Track your savings: +```bash +python3 scripts/context_optimizer.py stats +python3 scripts/token_tracker.py check +``` + +## Integration + +Run context optimizer before responding: +```bash +# Get recommendations +context_optimizer.py recommend "" + +# Only load recommended files +# Skip everything else +``` + +--- + +**This optimized approach reduces token usage by 50-80% for typical workloads.** +""" + +def main(): + """CLI interface for context optimizer.""" + import sys + + if len(sys.argv) < 2: + print("Usage: context_optimizer.py [recommend|record|stats|generate-agents]") + sys.exit(1) + + command = sys.argv[1] + + if command == "recommend": + if len(sys.argv) < 3: + print("Usage: context_optimizer.py recommend '' [current_files]") + sys.exit(1) + + prompt = sys.argv[2] + current_files = sys.argv[3:] if len(sys.argv) > 3 else None + + result = recommend_context_bundle(prompt, current_files) + print(json.dumps(result, indent=2)) + + elif command == "record": + if len(sys.argv) < 3: + print("Usage: context_optimizer.py record ") + sys.exit(1) + + file_path = sys.argv[2] + record_file_access(file_path) + print(f"Recorded access: {file_path}") + + elif command == "stats": + result = get_usage_stats() + print(json.dumps(result, indent=2)) + + elif command == "generate-agents": + content = generate_optimized_agents_md() + output_path = Path.home() / ".openclaw/workspace/AGENTS.md.optimized" + output_path.write_text(content) + print(f"Generated optimized AGENTS.md at: {output_path}") + print("\nReview and replace your current AGENTS.md with this version.") + + else: + print(f"Unknown command: {command}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/scripts/heartbeat_optimizer.py b/scripts/heartbeat_optimizer.py new file mode 100644 index 0000000..740bdcc --- /dev/null +++ b/scripts/heartbeat_optimizer.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python3 +""" +Heartbeat optimizer - Manages efficient heartbeat intervals and batched checks. +Reduces API calls by tracking check timestamps and batching operations. + +v1.4.0: Added cache-ttl alignment — recommends 55min intervals to keep +Anthropic's 1h prompt cache warm between heartbeats (avoids cache re-write penalty). +""" +import json +import os +from datetime import datetime, timedelta +from pathlib import Path + +STATE_FILE = Path.home() / ".openclaw/workspace/memory/heartbeat-state.json" + +# Optimal interval to keep Anthropic's 1h prompt cache warm. +# Set just under 1h so the cache never expires between heartbeats. +# Anthropic API key users should use this as their default heartbeat interval. +CACHE_TTL_OPTIMAL_INTERVAL = 3300 # 55 minutes in seconds +CACHE_TTL_WINDOW = 3600 # Anthropic default cache TTL = 1 hour + +DEFAULT_INTERVALS = { + "email": 3600, # 1 hour + "calendar": 7200, # 2 hours + "weather": 14400, # 4 hours + "social": 7200, # 2 hours + "monitoring": 1800 # 30 minutes +} + +QUIET_HOURS = { + "start": 23, # 11 PM + "end": 8 # 8 AM +} + +def load_state(): + """Load heartbeat tracking state.""" + if STATE_FILE.exists(): + with open(STATE_FILE, 'r') as f: + return json.load(f) + return { + "lastChecks": {}, + "intervals": DEFAULT_INTERVALS.copy(), + "skipCount": 0 + } + +def save_state(state): + """Save heartbeat tracking state.""" + STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(STATE_FILE, 'w') as f: + json.dump(state, f, indent=2) + +def is_quiet_hours(hour=None): + """Check if current time is during quiet hours.""" + if hour is None: + hour = datetime.now().hour + + start = QUIET_HOURS["start"] + end = QUIET_HOURS["end"] + + if start > end: # Wraps midnight + return hour >= start or hour < end + else: + return start <= hour < end + +def should_check(check_type, force=False): + """Determine if a check should run based on interval. + + Args: + check_type: Type of check (email, calendar, etc.) + force: Force check regardless of interval + + Returns: + dict with decision and reasoning + """ + if force: + return { + "should_check": True, + "reason": "Forced check", + "next_check": None + } + + # Skip all checks during quiet hours + if is_quiet_hours(): + return { + "should_check": False, + "reason": "Quiet hours (23:00-08:00)", + "next_check": "08:00" + } + + state = load_state() + now = datetime.now() + + # Get last check time + last_check_ts = state["lastChecks"].get(check_type) + if not last_check_ts: + # Never checked before + return { + "should_check": True, + "reason": "First check", + "next_check": None + } + + last_check = datetime.fromisoformat(last_check_ts) + interval = state["intervals"].get(check_type, DEFAULT_INTERVALS.get(check_type, 3600)) + next_check = last_check + timedelta(seconds=interval) + + if now >= next_check: + return { + "should_check": True, + "reason": f"Interval elapsed ({interval}s)", + "next_check": None + } + else: + remaining = (next_check - now).total_seconds() + return { + "should_check": False, + "reason": f"Too soon ({int(remaining / 60)}min remaining)", + "next_check": next_check.strftime("%H:%M") + } + +def record_check(check_type): + """Record that a check was performed.""" + state = load_state() + state["lastChecks"][check_type] = datetime.now().isoformat() + save_state(state) + +def plan_heartbeat(checks=None): + """Plan which checks should run in next heartbeat. + + Args: + checks: List of check types to consider (default: all) + + Returns: + dict with planned checks and skip decision + """ + if checks is None: + checks = list(DEFAULT_INTERVALS.keys()) + + planned = [] + skipped = [] + + for check in checks: + decision = should_check(check) + if decision["should_check"]: + planned.append({ + "type": check, + "reason": decision["reason"] + }) + else: + skipped.append({ + "type": check, + "reason": decision["reason"], + "next_check": decision["next_check"] + }) + + result = { + "planned": planned, + "skipped": skipped, + "should_run": len(planned) > 0, + "can_skip": len(planned) == 0 + } + + # Add cache TTL alignment recommendation + result["cache_ttl_tip"] = ( + "Tip: Set your OpenClaw heartbeat interval to 55min (3300s) " + "to keep the Anthropic 1h prompt cache warm. " + "Run: heartbeat_optimizer.py cache-ttl for details." + ) + + return result + +def get_cache_ttl_recommendation(cache_ttl_seconds=None): + """Calculate optimal heartbeat interval for Anthropic cache TTL warmup. + + Anthropic prompt caching has a 1h TTL by default on API key profiles. + Setting heartbeat interval just under the TTL prevents the cache from + expiring between heartbeats — avoiding the cache re-write penalty. + + Args: + cache_ttl_seconds: Your cache TTL in seconds (default: 3600 = 1h) + + Returns: + dict with recommended interval and explanation + """ + if cache_ttl_seconds is None: + cache_ttl_seconds = CACHE_TTL_WINDOW + + # Use 92% of TTL as the safe warmup interval (5min buffer) + buffer_seconds = 300 # 5 minute buffer + recommended = cache_ttl_seconds - buffer_seconds + + return { + "cache_ttl_seconds": cache_ttl_seconds, + "cache_ttl_human": f"{cache_ttl_seconds // 60}min", + "recommended_interval_seconds": recommended, + "recommended_interval_human": f"{recommended // 60}min", + "buffer_seconds": buffer_seconds, + "explanation": ( + f"With a {cache_ttl_seconds // 60}min Anthropic cache TTL, set your heartbeat " + f"to {recommended // 60}min ({recommended}s). This keeps the prompt cache warm " + f"between heartbeats — preventing the cache re-write penalty when the TTL expires." + ), + "how_to_configure": ( + "In openclaw.json: agents.defaults.heartbeat.every = \"55m\"\n" + "Or use the config patch from assets/config-patches.json (heartbeat_optimization)" + ), + "cost_impact": ( + "Cache writes cost ~3.75x more than cache reads (Anthropic pricing). " + "Without warmup: every heartbeat after an idle hour triggers a full cache re-write. " + "With warmup: cache reads only — significantly cheaper for long-running agents." + ), + "note": ( + "This applies to Anthropic API key users only. " + "OAuth profiles use a 1h heartbeat by default (OpenClaw smart default). " + "API key profiles default to 30min heartbeat — consider bumping to 55min." + ) + } + +def update_interval(check_type, new_interval_seconds): + """Update check interval for a specific check type. + + Args: + check_type: Type of check + new_interval_seconds: New interval in seconds + """ + state = load_state() + state["intervals"][check_type] = new_interval_seconds + save_state(state) + return { + "check_type": check_type, + "old_interval": DEFAULT_INTERVALS.get(check_type), + "new_interval": new_interval_seconds + } + +def main(): + """CLI interface for heartbeat optimizer.""" + import sys + + if len(sys.argv) < 2: + print("Usage: heartbeat_optimizer.py [plan|check|record|interval|cache-ttl|reset]") + sys.exit(1) + + command = sys.argv[1] + + if command == "plan": + # Plan next heartbeat + checks = sys.argv[2:] if len(sys.argv) > 2 else None + result = plan_heartbeat(checks) + print(json.dumps(result, indent=2)) + + elif command == "check": + # Check if specific type should run + if len(sys.argv) < 3: + print("Usage: heartbeat_optimizer.py check ") + sys.exit(1) + check_type = sys.argv[2] + force = len(sys.argv) > 3 and sys.argv[3] == "--force" + result = should_check(check_type, force) + print(json.dumps(result, indent=2)) + + elif command == "record": + # Record that a check was performed + if len(sys.argv) < 3: + print("Usage: heartbeat_optimizer.py record ") + sys.exit(1) + check_type = sys.argv[2] + record_check(check_type) + print(f"Recorded check: {check_type}") + + elif command == "interval": + # Update interval + if len(sys.argv) < 4: + print("Usage: heartbeat_optimizer.py interval ") + sys.exit(1) + check_type = sys.argv[2] + interval = int(sys.argv[3]) + result = update_interval(check_type, interval) + print(json.dumps(result, indent=2)) + + elif command == "cache-ttl": + # Show cache TTL alignment recommendation + cache_ttl = int(sys.argv[2]) if len(sys.argv) > 2 else None + result = get_cache_ttl_recommendation(cache_ttl) + print(json.dumps(result, indent=2)) + + elif command == "reset": + # Reset state + if STATE_FILE.exists(): + STATE_FILE.unlink() + print("Heartbeat state reset.") + + else: + print(f"Unknown command: {command}") + print("Available: plan | check | record | interval | cache-ttl [ttl_seconds] | reset") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/scripts/model_router.py b/scripts/model_router.py new file mode 100644 index 0000000..a266b2b --- /dev/null +++ b/scripts/model_router.py @@ -0,0 +1,438 @@ +#!/usr/bin/env python3 +""" +Smart model router - routes tasks to appropriate models based on complexity. +Supports multiple providers: Anthropic, OpenAI, Google, OpenRouter. +Helps reduce token costs by using cheaper models for simpler tasks. + +Version: 1.1.0 +""" +import re +import os +import json + +# ============================================================================ +# PROVIDER CONFIGURATION +# ============================================================================ + +# Detect primary provider from environment (default: anthropic) +def detect_provider(): + """Detect which provider to use based on available API keys.""" + if os.environ.get("ANTHROPIC_API_KEY"): + return "anthropic" + elif os.environ.get("OPENAI_API_KEY"): + return "openai" + elif os.environ.get("GOOGLE_API_KEY"): + return "google" + elif os.environ.get("OPENROUTER_API_KEY"): + return "openrouter" + # Default to anthropic + return "anthropic" + +# Model tiers per provider +PROVIDER_MODELS = { + "anthropic": { + "cheap": "anthropic/claude-haiku-4", + "balanced": "anthropic/claude-sonnet-4-5", + "smart": "anthropic/claude-opus-4", + "costs": { # $/MTok (input) + "cheap": 0.25, + "balanced": 3.00, + "smart": 15.00 + } + }, + "openai": { + "cheap": "openai/gpt-4.1-nano", + "balanced": "openai/gpt-4.1-mini", + "smart": "openai/gpt-4.1", + "premium": "openai/gpt-5", + "costs": { + "cheap": 0.10, + "balanced": 0.40, + "smart": 2.00, + "premium": 10.00 + } + }, + "google": { + "cheap": "google/gemini-2.0-flash", + "balanced": "google/gemini-2.5-flash", + "smart": "google/gemini-2.5-pro", + "costs": { + "cheap": 0.075, + "balanced": 0.15, + "smart": 1.25 + } + }, + "openrouter": { + "cheap": "google/gemini-2.0-flash", + "balanced": "anthropic/claude-sonnet-4-5", + "smart": "anthropic/claude-opus-4", + "costs": { + "cheap": 0.075, + "balanced": 3.00, + "smart": 15.00 + } + } +} + +# Tier mapping for cross-provider compatibility +TIER_ALIASES = { + "haiku": "cheap", + "sonnet": "balanced", + "opus": "smart", + "nano": "cheap", + "mini": "balanced", + "flash": "cheap", + "pro": "smart" +} + +# ============================================================================ +# TASK CLASSIFICATION PATTERNS +# ============================================================================ + +# Communication patterns that should ALWAYS use cheap tier (never balanced/smart) +COMMUNICATION_PATTERNS = [ + r'^(hi|hey|hello|yo|sup)\b', + r'^(thanks|thank you|thx)\b', + r'^(ok|okay|sure|got it|understood)\b', + r'^(yes|yeah|yep|yup|no|nope)\b', + r'^(good|great|nice|cool|awesome)\b', + r"^(what|how)'s (up|it going)", + r'^\w{1,15}$', # Single short word + r'^(lol|haha|lmao)\b', +] + +# Background/routine tasks that should ALWAYS use cheap tier +BACKGROUND_TASK_PATTERNS = [ + # Heartbeat checks + r'heartbeat', + r'check\s+(email|calendar|weather|monitoring)', + r'monitor\s+', + r'poll\s+', + + # Cronjob/scheduled tasks + r'cron', + r'scheduled\s+task', + r'periodic\s+check', + r'reminder', + + # Document parsing/extraction + r'parse\s+(document|file|log|csv|json|xml)', + r'extract\s+(text|data|content)\s+from', + r'read\s+(log|logs)', + r'scan\s+(file|document)', + r'process\s+(csv|json|xml|yaml)', +] + +# Model routing rules with tier-based approach +ROUTING_RULES = { + "cheap": { + "patterns": [ + r"read\s+file", + r"list\s+files", + r"show\s+(me\s+)?the\s+contents?", + r"what's\s+in", + r"cat\s+", + r"get\s+status", + r"check\s+(if|whether)", + r"is\s+\w+\s+(running|active|enabled)" + ], + "keywords": ["read", "list", "show", "status", "check", "get"], + "cost_multiplier": 0.083 # vs balanced + }, + "balanced": { + "patterns": [ + r"write\s+\w+", + r"create\s+\w+", + r"edit\s+\w+", + r"fix\s+\w+", + r"debug\s+\w+", + r"explain\s+\w+", + r"how\s+(do|can)\s+i" + ], + "keywords": ["write", "create", "edit", "update", "fix", "debug", "explain"], + "cost_multiplier": 1.0 + }, + "smart": { + "patterns": [ + r"complex\s+\w+", + r"design\s+\w+", + r"architect\w+", + r"analyze\s+deeply", + r"comprehensive\s+\w+" + ], + "keywords": ["design", "architect", "complex", "comprehensive", "deep"], + "cost_multiplier": 5.0 + } +} + +# Legacy tier names for backwards compatibility +LEGACY_TIER_MAP = { + "haiku": "cheap", + "sonnet": "balanced", + "opus": "smart" +} + +# ============================================================================ +# CORE FUNCTIONS +# ============================================================================ + +def classify_task(prompt): + """Classify task complexity based on prompt text. + + Args: + prompt: User's message/request + + Returns: + tuple of (tier, confidence, reasoning) + tier is one of: cheap, balanced, smart + """ + prompt_lower = prompt.lower() + + # FIRST: Check if this is simple communication (ALWAYS cheap) + for pattern in COMMUNICATION_PATTERNS: + if re.search(pattern, prompt_lower): + return ("cheap", 1.0, "Simple communication - use cheapest model") + + # SECOND: Check if this is a background/routine task (ALWAYS cheap) + for pattern in BACKGROUND_TASK_PATTERNS: + if re.search(pattern, prompt_lower): + return ("cheap", 1.0, "Background task (heartbeat/cron/parsing) - use cheapest model") + + # Score each tier + scores = {} + for tier, rules in ROUTING_RULES.items(): + score = 0 + matches = [] + + # Pattern matching + for pattern in rules["patterns"]: + if re.search(pattern, prompt_lower): + score += 2 + matches.append(f"pattern:{pattern}") + + # Keyword matching + for keyword in rules["keywords"]: + if keyword in prompt_lower: + score += 1 + matches.append(f"keyword:{keyword}") + + scores[tier] = { + "score": score, + "matches": matches + } + + # Determine best tier + best_tier = max(scores.items(), key=lambda x: x[1]["score"]) + + if best_tier[1]["score"] == 0: + # Default to balanced if unclear + return ("balanced", 0.5, "No clear indicators, defaulting to balanced model") + + confidence = min(best_tier[1]["score"] / 5.0, 1.0) # Cap at 1.0 + reasoning = f"Matched: {', '.join(best_tier[1]['matches'][:3])}" + + return (best_tier[0], confidence, reasoning) + +def normalize_tier(tier): + """Normalize tier name to standard format (cheap/balanced/smart).""" + tier_lower = tier.lower() + + # Check legacy mappings + if tier_lower in LEGACY_TIER_MAP: + return LEGACY_TIER_MAP[tier_lower] + + # Check aliases + if tier_lower in TIER_ALIASES: + return TIER_ALIASES[tier_lower] + + # Already standard or unknown + if tier_lower in ["cheap", "balanced", "smart", "premium"]: + return tier_lower + + return "balanced" # Default + +def get_model_for_tier(tier, provider=None): + """Get the specific model name for a tier and provider. + + Args: + tier: cheap, balanced, smart, or premium + provider: anthropic, openai, google, openrouter (or None to auto-detect) + + Returns: + Model identifier string + """ + if provider is None: + provider = detect_provider() + + provider_config = PROVIDER_MODELS.get(provider, PROVIDER_MODELS["anthropic"]) + + # Normalize tier + tier = normalize_tier(tier) + + # Get model (fallback to balanced if tier not available) + return provider_config.get(tier, provider_config.get("balanced")) + +def route_task(prompt, current_model=None, force_tier=None, provider=None): + """Route a task to appropriate model. + + Args: + prompt: User's message/request + current_model: Current model being used (optional) + force_tier: Override classification (cheap/balanced/smart or haiku/sonnet/opus) + provider: Force specific provider (anthropic/openai/google/openrouter) + + Returns: + dict with routing decision + """ + # Auto-detect provider if not specified + if provider is None: + provider = detect_provider() + + # Set default current model + if current_model is None: + current_model = get_model_for_tier("balanced", provider) + + if force_tier: + tier = normalize_tier(force_tier) + confidence = 1.0 + reasoning = "User-specified tier" + else: + tier, confidence, reasoning = classify_task(prompt) + + recommended_model = get_model_for_tier(tier, provider) + + # Calculate cost savings + provider_config = PROVIDER_MODELS.get(provider, PROVIDER_MODELS["anthropic"]) + base_cost = provider_config["costs"].get("balanced", 1.0) + tier_cost = provider_config["costs"].get(tier, base_cost) + cost_savings = (1.0 - (tier_cost / base_cost)) * 100 + + return { + "provider": provider, + "current_model": current_model, + "recommended_model": recommended_model, + "tier": tier, + "tier_display": { + "cheap": "Cheap (Haiku/Nano/Flash)", + "balanced": "Balanced (Sonnet/Mini/Flash)", + "smart": "Smart (Opus/GPT-4.1/Pro)", + "premium": "Premium (GPT-5)" + }.get(tier, tier), + "confidence": confidence, + "reasoning": reasoning, + "cost_savings_percent": max(0, cost_savings), + "should_switch": recommended_model != current_model, + "all_providers": { + p: get_model_for_tier(tier, p) for p in PROVIDER_MODELS.keys() + } + } + +def get_model_comparison(): + """Get a comparison of all models across providers. + + Returns: + dict with provider -> tier -> model mapping + """ + result = {} + for provider, config in PROVIDER_MODELS.items(): + result[provider] = { + tier: { + "model": model, + "cost_per_mtok": config["costs"].get(tier, "N/A") + } + for tier, model in config.items() + if tier != "costs" + } + return result + +# ============================================================================ +# CLI INTERFACE +# ============================================================================ + +def main(): + """CLI interface for model router.""" + import sys + + if len(sys.argv) < 2: + print("Usage: model_router.py [args]") + print("") + print("Commands:") + print(" route '' [current_model] [force_tier] [provider]") + print(" compare — Show all models across providers") + print(" providers — List available providers") + print(" detect — Show auto-detected provider") + print("") + print("Examples:") + print(" model_router.py route 'thanks!'") + print(" model_router.py route 'design an architecture' --provider openai") + print(" model_router.py compare") + sys.exit(1) + + command = sys.argv[1] + + # Known commands + known_commands = ["route", "compare", "providers", "detect"] + + if command == "route" or command not in known_commands: + # Route a prompt (either explicit "route" command or shorthand) + if command == "route": + if len(sys.argv) < 3: + print("Usage: model_router.py route ''") + sys.exit(1) + prompt = sys.argv[2] + start_idx = 3 + else: + # Shorthand: first arg is the prompt + prompt = command + start_idx = 2 + + # Parse remaining args + current_model = None + force_tier = None + provider = None + + i = start_idx + while i < len(sys.argv): + arg = sys.argv[i] + if arg.startswith("--provider="): + provider = arg.split("=")[1] + elif arg.startswith("--tier="): + force_tier = arg.split("=")[1] + elif arg == "--provider" and i+1 < len(sys.argv): + provider = sys.argv[i+1] + i += 1 + elif arg == "--tier" and i+1 < len(sys.argv): + force_tier = sys.argv[i+1] + i += 1 + elif arg.startswith("--"): + pass # Skip unknown flags + elif current_model is None and "/" in arg: + current_model = arg + elif force_tier is None: + force_tier = arg + i += 1 + + result = route_task(prompt, current_model, force_tier, provider) + print(json.dumps(result, indent=2)) + + elif command == "compare": + result = get_model_comparison() + print(json.dumps(result, indent=2)) + + elif command == "providers": + print("Available providers:") + for provider in PROVIDER_MODELS.keys(): + detected = " (detected)" if provider == detect_provider() else "" + print(f" - {provider}{detected}") + + elif command == "detect": + provider = detect_provider() + print(f"Auto-detected provider: {provider}") + print(f"Models: {json.dumps(PROVIDER_MODELS[provider], indent=2)}") + + else: + print(f"Unknown command: {command}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/scripts/optimize.sh b/scripts/optimize.sh new file mode 100644 index 0000000..b442134 --- /dev/null +++ b/scripts/optimize.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# optimize.sh - Quick CLI wrapper for token optimization tools +# +# Usage: +# ./optimize.sh route "your prompt here" # Route to appropriate model +# ./optimize.sh context # Generate optimized AGENTS.md +# ./optimize.sh recommend "prompt" # Recommend context files +# ./optimize.sh budget # Check token budget +# ./optimize.sh heartbeat # Install optimized heartbeat +# +# Examples: +# ./optimize.sh route "thanks!" # → cheap tier (Haiku) +# ./optimize.sh route "design an API" # → smart tier (Opus) + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +case "$1" in + route|model) + shift + python3 "$SCRIPT_DIR/model_router.py" "$@" + ;; + context|agents) + python3 "$SCRIPT_DIR/context_optimizer.py" generate-agents + ;; + recommend|ctx) + shift + python3 "$SCRIPT_DIR/context_optimizer.py" recommend "$@" + ;; + budget|tokens|check) + python3 "$SCRIPT_DIR/token_tracker.py" check + ;; + heartbeat|hb) + DEST="${HOME}/.openclaw/workspace/HEARTBEAT.md" + cp "$SCRIPT_DIR/../assets/HEARTBEAT.template.md" "$DEST" + echo "✅ Installed optimized heartbeat to: $DEST" + ;; + providers) + python3 "$SCRIPT_DIR/model_router.py" providers + ;; + detect) + python3 "$SCRIPT_DIR/model_router.py" detect + ;; + help|--help|-h|"") + echo "Token Optimizer CLI" + echo "" + echo "Usage: ./optimize.sh [args]" + echo "" + echo "Commands:" + echo " route Route prompt to appropriate model tier" + echo " context Generate optimized AGENTS.md" + echo " recommend Recommend context files for prompt" + echo " budget Check current token budget" + echo " heartbeat Install optimized heartbeat" + echo " providers List available providers" + echo " detect Show auto-detected provider" + echo "" + echo "Examples:" + echo " ./optimize.sh route 'thanks!' # → cheap tier" + echo " ./optimize.sh route 'design an API' # → smart tier" + echo " ./optimize.sh budget # → current usage" + ;; + *) + echo "Unknown command: $1" + echo "Run './optimize.sh help' for usage" + exit 1 + ;; +esac diff --git a/scripts/token_tracker.py b/scripts/token_tracker.py new file mode 100644 index 0000000..acfb062 --- /dev/null +++ b/scripts/token_tracker.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +""" +Token usage tracker with budget alerts. +Monitors API usage and warns when approaching limits. +""" +import json +import os +from datetime import datetime, timedelta +from pathlib import Path + +STATE_FILE = Path.home() / ".openclaw/workspace/memory/token-tracker-state.json" + +def load_state(): + """Load tracking state from file.""" + if STATE_FILE.exists(): + with open(STATE_FILE, 'r') as f: + return json.load(f) + return { + "daily_usage": {}, + "alerts_sent": [], + "last_reset": datetime.now().isoformat() + } + +def save_state(state): + """Save tracking state to file.""" + STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(STATE_FILE, 'w') as f: + json.dump(state, f, indent=2) + +def get_usage_from_session_status(): + """Parse session status to extract token usage. + Returns dict with input_tokens, output_tokens, and cost. + """ + # This would integrate with OpenClaw's session_status tool + # For now, returns placeholder structure + return { + "input_tokens": 0, + "output_tokens": 0, + "total_cost": 0.0, + "model": "anthropic/claude-sonnet-4-5" + } + +def check_budget(daily_limit_usd=5.0, warn_threshold=0.8): + """Check if usage is approaching daily budget. + + Args: + daily_limit_usd: Daily spending limit in USD + warn_threshold: Fraction of limit to trigger warning (default 80%) + + Returns: + dict with status, usage, limit, and alert message if applicable + """ + state = load_state() + today = datetime.now().date().isoformat() + + # Reset if new day + if today not in state["daily_usage"]: + state["daily_usage"] = {today: {"cost": 0.0, "tokens": 0}} + state["alerts_sent"] = [] + + usage = state["daily_usage"][today] + percent_used = (usage["cost"] / daily_limit_usd) * 100 + + result = { + "date": today, + "cost": usage["cost"], + "tokens": usage["tokens"], + "limit": daily_limit_usd, + "percent_used": percent_used, + "status": "ok" + } + + # Check thresholds + if percent_used >= 100: + result["status"] = "exceeded" + result["alert"] = f"⚠️ Daily budget exceeded! ${usage['cost']:.2f} / ${daily_limit_usd:.2f}" + elif percent_used >= (warn_threshold * 100): + result["status"] = "warning" + result["alert"] = f"⚠️ Approaching daily limit: ${usage['cost']:.2f} / ${daily_limit_usd:.2f} ({percent_used:.0f}%)" + + return result + +def suggest_cheaper_model(current_model, task_type="general"): + """Suggest cheaper alternative models based on task type. + + Args: + current_model: Currently configured model + task_type: Type of task (general, simple, complex) + + Returns: + dict with suggestion and cost savings + """ + # Cost per 1M tokens (input/output average) + model_costs = { + "anthropic/claude-opus-4": 15.0, + "anthropic/claude-sonnet-4-5": 3.0, + "anthropic/claude-haiku-4": 0.25, + "google/gemini-2.0-flash-exp": 0.075, + "openai/gpt-4o": 2.5, + "openai/gpt-4o-mini": 0.15 + } + + suggestions = { + "simple": [ + ("anthropic/claude-haiku-4", "12x cheaper, great for file reads, routine checks"), + ("google/gemini-2.0-flash-exp", "40x cheaper via OpenRouter, good for simple tasks") + ], + "general": [ + ("anthropic/claude-sonnet-4-5", "Balanced performance and cost"), + ("google/gemini-2.0-flash-exp", "Much cheaper, decent quality") + ], + "complex": [ + ("anthropic/claude-opus-4", "Best reasoning, use sparingly"), + ("anthropic/claude-sonnet-4-5", "Good balance for most complex tasks") + ] + } + + return { + "current": current_model, + "current_cost": model_costs.get(current_model, "unknown"), + "suggestions": suggestions.get(task_type, suggestions["general"]) + } + +def main(): + """CLI interface for token tracker.""" + import sys + + if len(sys.argv) < 2: + print("Usage: token_tracker.py [check|suggest|reset]") + sys.exit(1) + + command = sys.argv[1] + + if command == "check": + result = check_budget() + print(json.dumps(result, indent=2)) + + elif command == "suggest": + task = sys.argv[2] if len(sys.argv) > 2 else "general" + current = sys.argv[3] if len(sys.argv) > 3 else "anthropic/claude-sonnet-4-5" + result = suggest_cheaper_model(current, task) + print(json.dumps(result, indent=2)) + + elif command == "reset": + state = load_state() + state["daily_usage"] = {} + state["alerts_sent"] = [] + save_state(state) + print("Token tracker state reset.") + + else: + print(f"Unknown command: {command}") + sys.exit(1) + +if __name__ == "__main__": + main()