Initial commit with translated description

This commit is contained in:
2026-03-29 08:32:46 +08:00
commit 6637dd34f9
11 changed files with 2613 additions and 0 deletions

View File

@@ -0,0 +1,395 @@
#!/usr/bin/env python3
"""
Context optimizer - Analyze and minimize context loading.
Tracks which files are actually needed and creates minimal bundles.
"""
import json
import re
from pathlib import Path
from datetime import datetime, timedelta
STATE_FILE = Path.home() / ".openclaw/workspace/memory/context-usage.json"
# Files that should ALWAYS be loaded (identity/personality)
ALWAYS_LOAD = [
"SOUL.md",
"IDENTITY.md"
]
# Files to load on-demand based on triggers
CONDITIONAL_FILES = {
"AGENTS.md": ["workflow", "process", "how do i", "remember", "what should"],
"USER.md": ["user", "human", "owner", "about you", "who are you helping"],
"TOOLS.md": ["tool", "camera", "ssh", "voice", "tts", "device"],
"MEMORY.md": ["remember", "recall", "history", "past", "before", "last time"],
"HEARTBEAT.md": ["heartbeat", "check", "monitor", "alert"],
}
# Files to NEVER load for simple conversations
SKIP_FOR_SIMPLE = [
"docs/**/*.md", # Documentation
"memory/20*.md", # Old daily logs
"knowledge/**/*", # Knowledge base
"tasks/**/*", # Task tracking
]
def load_usage_state():
"""Load context usage tracking."""
if STATE_FILE.exists():
with open(STATE_FILE, 'r') as f:
return json.load(f)
return {
"file_access_count": {},
"last_accessed": {},
"session_summaries": []
}
def save_usage_state(state):
"""Save context usage tracking."""
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(STATE_FILE, 'w') as f:
json.dump(state, f, indent=2)
def classify_prompt(prompt):
"""Classify prompt to determine context needs.
Returns:
tuple of (complexity, context_level, reasoning)
complexity: simple | medium | complex
context_level: minimal | standard | full
"""
prompt_lower = prompt.lower()
# Simple conversational patterns (minimal context)
simple_patterns = [
r'^(hi|hey|hello|thanks|thank you|ok|okay|yes|no|sure)\b',
r'^(what|how)\'s (up|it going)',
r'^\w{1,20}$', # Single word
r'^(good|great|nice|cool)',
]
for pattern in simple_patterns:
if re.search(pattern, prompt_lower):
return ("simple", "minimal", "Conversational/greeting pattern")
# Check for file/documentation references
if any(word in prompt_lower for word in ["read", "show", "file", "doc", "content"]):
return ("simple", "standard", "File access request")
# Check for memory/history references
if any(word in prompt_lower for word in ["remember", "recall", "history", "before", "last time"]):
return ("medium", "full", "Memory access needed")
# Check for complex task indicators
complex_indicators = ["design", "architect", "plan", "strategy", "analyze deeply", "comprehensive"]
if any(word in prompt_lower for word in complex_indicators):
return ("complex", "full", "Complex task requiring full context")
# Default to standard for normal work requests
return ("medium", "standard", "Standard work request")
def recommend_context_bundle(prompt, current_files=None):
"""Recommend which files to load for a given prompt.
Args:
prompt: User's message
current_files: List of files currently loaded (optional)
Returns:
dict with recommendations
"""
complexity, context_level, reasoning = classify_prompt(prompt)
prompt_lower = prompt.lower()
# Start with always-load files
recommended = set(ALWAYS_LOAD)
if context_level == "minimal":
# For simple conversations, ONLY identity files
pass
elif context_level == "standard":
# Add conditionally-loaded files based on triggers
for file, triggers in CONDITIONAL_FILES.items():
if any(trigger in prompt_lower for trigger in triggers):
recommended.add(file)
# Add today's memory log only
today = datetime.now().strftime("%Y-%m-%d")
recommended.add(f"memory/{today}.md")
elif context_level == "full":
# Add all conditional files
recommended.update(CONDITIONAL_FILES.keys())
# Add today + yesterday memory logs
today = datetime.now()
yesterday = today - timedelta(days=1)
recommended.add(f"memory/{today.strftime('%Y-%m-%d')}.md")
recommended.add(f"memory/{yesterday.strftime('%Y-%m-%d')}.md")
# Add MEMORY.md for long-term context
recommended.add("MEMORY.md")
# Calculate savings
if current_files:
current_count = len(current_files)
recommended_count = len(recommended)
savings_percent = ((current_count - recommended_count) / current_count) * 100
else:
savings_percent = None
return {
"complexity": complexity,
"context_level": context_level,
"reasoning": reasoning,
"recommended_files": sorted(list(recommended)),
"file_count": len(recommended),
"savings_percent": savings_percent,
"skip_patterns": SKIP_FOR_SIMPLE if context_level == "minimal" else []
}
def record_file_access(file_path):
"""Record that a file was accessed."""
state = load_usage_state()
# Increment access count
state["file_access_count"][file_path] = state["file_access_count"].get(file_path, 0) + 1
# Update last accessed timestamp
state["last_accessed"][file_path] = datetime.now().isoformat()
save_usage_state(state)
def get_usage_stats():
"""Get file usage statistics.
Returns:
dict with frequently/rarely accessed files
"""
state = load_usage_state()
# Sort by access count
sorted_files = sorted(
state["file_access_count"].items(),
key=lambda x: x[1],
reverse=True
)
total_accesses = sum(state["file_access_count"].values())
# Classify files
frequent = [] # Top 20% of accesses
occasional = [] # Middle 60%
rare = [] # Bottom 20%
if sorted_files:
threshold_frequent = total_accesses * 0.2
threshold_rare = total_accesses * 0.8
cumulative = 0
for file, count in sorted_files:
cumulative += count
if cumulative <= threshold_frequent:
frequent.append({"file": file, "count": count})
elif cumulative <= threshold_rare:
occasional.append({"file": file, "count": count})
else:
rare.append({"file": file, "count": count})
return {
"total_accesses": total_accesses,
"unique_files": len(sorted_files),
"frequent": frequent,
"occasional": occasional,
"rare": rare,
"recommendation": f"Consider loading frequently accessed files upfront, lazy-load rare files"
}
def generate_optimized_agents_md():
"""Generate an optimized AGENTS.md with lazy loading instructions.
Returns:
str with new AGENTS.md content
"""
return """# AGENTS.md - Token-Optimized Workspace
## 🎯 Context Loading Strategy (OPTIMIZED)
**Default: Minimal context, load on-demand**
### Every Session (Always Load)
1. Read `SOUL.md` — Who you are (identity/personality)
2. Read `IDENTITY.md` — Your role/name
**Stop there.** Don't load anything else unless needed.
### Load On-Demand Only
**When user mentions memory/history:**
- Read `MEMORY.md`
- Read `memory/YYYY-MM-DD.md` (today only)
**When user asks about workflows/processes:**
- Read `AGENTS.md` (this file)
**When user asks about tools/devices:**
- Read `TOOLS.md`
**When user asks about themselves:**
- Read `USER.md`
**Never load automatically:**
- ❌ Documentation (`docs/**/*.md`) — load only when explicitly referenced
- ❌ Old memory logs (`memory/2026-01-*.md`) — load only if user mentions date
- ❌ Knowledge base (`knowledge/**/*`) — load only when user asks about specific topic
- ❌ Task files (`tasks/**/*`) — load only when user references task
### Context by Conversation Type
**Simple conversation** (hi, thanks, yes, quick question):
- Load: SOUL.md, IDENTITY.md
- Skip: Everything else
- **Token savings: ~80%**
**Standard work request** (write code, check file):
- Load: SOUL.md, IDENTITY.md, memory/TODAY.md
- Conditionally load: TOOLS.md (if mentions tools)
- Skip: docs, old memory logs
- **Token savings: ~50%**
**Complex task** (design system, analyze history):
- Load: SOUL.md, IDENTITY.md, MEMORY.md, memory/TODAY.md, memory/YESTERDAY.md
- Conditionally load: Relevant docs/knowledge
- Skip: Unrelated documentation
- **Token savings: ~30%**
## 🔥 Model Selection (ENFORCED)
**Simple conversations → HAIKU ONLY**
- Greetings, acknowledgments, simple questions
- Never use Sonnet/Opus for casual chat
- Override: `session_status model=haiku-4`
**Standard work → SONNET**
- Code writing, file edits, explanations
- Default model for most work
**Complex reasoning → OPUS**
- Architecture design, deep analysis
- Use sparingly, only when explicitly needed
## 💾 Memory (Lazy Loading)
**Daily notes:** `memory/YYYY-MM-DD.md`
- ✅ Load TODAY when user asks about recent work
- ❌ Don't load YESTERDAY unless explicitly needed
- ❌ Don't load older logs automatically
**Long-term:** `MEMORY.md`
- ✅ Load when user mentions "remember", "history", "before"
- ❌ Don't load for simple conversations
## 📊 Heartbeats (Optimized)
Use `heartbeat_optimizer.py` from token-optimizer skill:
- Check only what needs checking (not everything every time)
- Skip during quiet hours (23:00-08:00)
- Return `HEARTBEAT_OK` when nothing to report
## 🎨 Skills (Lazy Loading)
**Don't pre-read skill documentation.**
When skill triggers:
1. Read only the SKILL.md
2. Read only the specific reference files you need
3. Skip examples/assets unless explicitly needed
## 🚫 Anti-Patterns (What NOT to Do)
❌ Loading all docs at session start
❌ Re-reading unchanged files
❌ Using Opus for simple chat
❌ Checking everything in every heartbeat
❌ Loading full conversation history for simple questions
✅ Load minimal context by default
✅ Read files only when referenced
✅ Use cheapest model for the task
✅ Batch heartbeat checks intelligently
✅ Keep context focused on current task
## 📈 Monitoring
Track your savings:
```bash
python3 scripts/context_optimizer.py stats
python3 scripts/token_tracker.py check
```
## Integration
Run context optimizer before responding:
```bash
# Get recommendations
context_optimizer.py recommend "<user prompt>"
# Only load recommended files
# Skip everything else
```
---
**This optimized approach reduces token usage by 50-80% for typical workloads.**
"""
def main():
"""CLI interface for context optimizer."""
import sys
if len(sys.argv) < 2:
print("Usage: context_optimizer.py [recommend|record|stats|generate-agents]")
sys.exit(1)
command = sys.argv[1]
if command == "recommend":
if len(sys.argv) < 3:
print("Usage: context_optimizer.py recommend '<prompt>' [current_files]")
sys.exit(1)
prompt = sys.argv[2]
current_files = sys.argv[3:] if len(sys.argv) > 3 else None
result = recommend_context_bundle(prompt, current_files)
print(json.dumps(result, indent=2))
elif command == "record":
if len(sys.argv) < 3:
print("Usage: context_optimizer.py record <file_path>")
sys.exit(1)
file_path = sys.argv[2]
record_file_access(file_path)
print(f"Recorded access: {file_path}")
elif command == "stats":
result = get_usage_stats()
print(json.dumps(result, indent=2))
elif command == "generate-agents":
content = generate_optimized_agents_md()
output_path = Path.home() / ".openclaw/workspace/AGENTS.md.optimized"
output_path.write_text(content)
print(f"Generated optimized AGENTS.md at: {output_path}")
print("\nReview and replace your current AGENTS.md with this version.")
else:
print(f"Unknown command: {command}")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,298 @@
#!/usr/bin/env python3
"""
Heartbeat optimizer - Manages efficient heartbeat intervals and batched checks.
Reduces API calls by tracking check timestamps and batching operations.
v1.4.0: Added cache-ttl alignment — recommends 55min intervals to keep
Anthropic's 1h prompt cache warm between heartbeats (avoids cache re-write penalty).
"""
import json
import os
from datetime import datetime, timedelta
from pathlib import Path
STATE_FILE = Path.home() / ".openclaw/workspace/memory/heartbeat-state.json"
# Optimal interval to keep Anthropic's 1h prompt cache warm.
# Set just under 1h so the cache never expires between heartbeats.
# Anthropic API key users should use this as their default heartbeat interval.
CACHE_TTL_OPTIMAL_INTERVAL = 3300 # 55 minutes in seconds
CACHE_TTL_WINDOW = 3600 # Anthropic default cache TTL = 1 hour
DEFAULT_INTERVALS = {
"email": 3600, # 1 hour
"calendar": 7200, # 2 hours
"weather": 14400, # 4 hours
"social": 7200, # 2 hours
"monitoring": 1800 # 30 minutes
}
QUIET_HOURS = {
"start": 23, # 11 PM
"end": 8 # 8 AM
}
def load_state():
"""Load heartbeat tracking state."""
if STATE_FILE.exists():
with open(STATE_FILE, 'r') as f:
return json.load(f)
return {
"lastChecks": {},
"intervals": DEFAULT_INTERVALS.copy(),
"skipCount": 0
}
def save_state(state):
"""Save heartbeat tracking state."""
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(STATE_FILE, 'w') as f:
json.dump(state, f, indent=2)
def is_quiet_hours(hour=None):
"""Check if current time is during quiet hours."""
if hour is None:
hour = datetime.now().hour
start = QUIET_HOURS["start"]
end = QUIET_HOURS["end"]
if start > end: # Wraps midnight
return hour >= start or hour < end
else:
return start <= hour < end
def should_check(check_type, force=False):
"""Determine if a check should run based on interval.
Args:
check_type: Type of check (email, calendar, etc.)
force: Force check regardless of interval
Returns:
dict with decision and reasoning
"""
if force:
return {
"should_check": True,
"reason": "Forced check",
"next_check": None
}
# Skip all checks during quiet hours
if is_quiet_hours():
return {
"should_check": False,
"reason": "Quiet hours (23:00-08:00)",
"next_check": "08:00"
}
state = load_state()
now = datetime.now()
# Get last check time
last_check_ts = state["lastChecks"].get(check_type)
if not last_check_ts:
# Never checked before
return {
"should_check": True,
"reason": "First check",
"next_check": None
}
last_check = datetime.fromisoformat(last_check_ts)
interval = state["intervals"].get(check_type, DEFAULT_INTERVALS.get(check_type, 3600))
next_check = last_check + timedelta(seconds=interval)
if now >= next_check:
return {
"should_check": True,
"reason": f"Interval elapsed ({interval}s)",
"next_check": None
}
else:
remaining = (next_check - now).total_seconds()
return {
"should_check": False,
"reason": f"Too soon ({int(remaining / 60)}min remaining)",
"next_check": next_check.strftime("%H:%M")
}
def record_check(check_type):
"""Record that a check was performed."""
state = load_state()
state["lastChecks"][check_type] = datetime.now().isoformat()
save_state(state)
def plan_heartbeat(checks=None):
"""Plan which checks should run in next heartbeat.
Args:
checks: List of check types to consider (default: all)
Returns:
dict with planned checks and skip decision
"""
if checks is None:
checks = list(DEFAULT_INTERVALS.keys())
planned = []
skipped = []
for check in checks:
decision = should_check(check)
if decision["should_check"]:
planned.append({
"type": check,
"reason": decision["reason"]
})
else:
skipped.append({
"type": check,
"reason": decision["reason"],
"next_check": decision["next_check"]
})
result = {
"planned": planned,
"skipped": skipped,
"should_run": len(planned) > 0,
"can_skip": len(planned) == 0
}
# Add cache TTL alignment recommendation
result["cache_ttl_tip"] = (
"Tip: Set your OpenClaw heartbeat interval to 55min (3300s) "
"to keep the Anthropic 1h prompt cache warm. "
"Run: heartbeat_optimizer.py cache-ttl for details."
)
return result
def get_cache_ttl_recommendation(cache_ttl_seconds=None):
"""Calculate optimal heartbeat interval for Anthropic cache TTL warmup.
Anthropic prompt caching has a 1h TTL by default on API key profiles.
Setting heartbeat interval just under the TTL prevents the cache from
expiring between heartbeats — avoiding the cache re-write penalty.
Args:
cache_ttl_seconds: Your cache TTL in seconds (default: 3600 = 1h)
Returns:
dict with recommended interval and explanation
"""
if cache_ttl_seconds is None:
cache_ttl_seconds = CACHE_TTL_WINDOW
# Use 92% of TTL as the safe warmup interval (5min buffer)
buffer_seconds = 300 # 5 minute buffer
recommended = cache_ttl_seconds - buffer_seconds
return {
"cache_ttl_seconds": cache_ttl_seconds,
"cache_ttl_human": f"{cache_ttl_seconds // 60}min",
"recommended_interval_seconds": recommended,
"recommended_interval_human": f"{recommended // 60}min",
"buffer_seconds": buffer_seconds,
"explanation": (
f"With a {cache_ttl_seconds // 60}min Anthropic cache TTL, set your heartbeat "
f"to {recommended // 60}min ({recommended}s). This keeps the prompt cache warm "
f"between heartbeats — preventing the cache re-write penalty when the TTL expires."
),
"how_to_configure": (
"In openclaw.json: agents.defaults.heartbeat.every = \"55m\"\n"
"Or use the config patch from assets/config-patches.json (heartbeat_optimization)"
),
"cost_impact": (
"Cache writes cost ~3.75x more than cache reads (Anthropic pricing). "
"Without warmup: every heartbeat after an idle hour triggers a full cache re-write. "
"With warmup: cache reads only — significantly cheaper for long-running agents."
),
"note": (
"This applies to Anthropic API key users only. "
"OAuth profiles use a 1h heartbeat by default (OpenClaw smart default). "
"API key profiles default to 30min heartbeat — consider bumping to 55min."
)
}
def update_interval(check_type, new_interval_seconds):
"""Update check interval for a specific check type.
Args:
check_type: Type of check
new_interval_seconds: New interval in seconds
"""
state = load_state()
state["intervals"][check_type] = new_interval_seconds
save_state(state)
return {
"check_type": check_type,
"old_interval": DEFAULT_INTERVALS.get(check_type),
"new_interval": new_interval_seconds
}
def main():
"""CLI interface for heartbeat optimizer."""
import sys
if len(sys.argv) < 2:
print("Usage: heartbeat_optimizer.py [plan|check|record|interval|cache-ttl|reset]")
sys.exit(1)
command = sys.argv[1]
if command == "plan":
# Plan next heartbeat
checks = sys.argv[2:] if len(sys.argv) > 2 else None
result = plan_heartbeat(checks)
print(json.dumps(result, indent=2))
elif command == "check":
# Check if specific type should run
if len(sys.argv) < 3:
print("Usage: heartbeat_optimizer.py check <type>")
sys.exit(1)
check_type = sys.argv[2]
force = len(sys.argv) > 3 and sys.argv[3] == "--force"
result = should_check(check_type, force)
print(json.dumps(result, indent=2))
elif command == "record":
# Record that a check was performed
if len(sys.argv) < 3:
print("Usage: heartbeat_optimizer.py record <type>")
sys.exit(1)
check_type = sys.argv[2]
record_check(check_type)
print(f"Recorded check: {check_type}")
elif command == "interval":
# Update interval
if len(sys.argv) < 4:
print("Usage: heartbeat_optimizer.py interval <type> <seconds>")
sys.exit(1)
check_type = sys.argv[2]
interval = int(sys.argv[3])
result = update_interval(check_type, interval)
print(json.dumps(result, indent=2))
elif command == "cache-ttl":
# Show cache TTL alignment recommendation
cache_ttl = int(sys.argv[2]) if len(sys.argv) > 2 else None
result = get_cache_ttl_recommendation(cache_ttl)
print(json.dumps(result, indent=2))
elif command == "reset":
# Reset state
if STATE_FILE.exists():
STATE_FILE.unlink()
print("Heartbeat state reset.")
else:
print(f"Unknown command: {command}")
print("Available: plan | check <type> | record <type> | interval <type> <seconds> | cache-ttl [ttl_seconds] | reset")
sys.exit(1)
if __name__ == "__main__":
main()

438
scripts/model_router.py Normal file
View File

@@ -0,0 +1,438 @@
#!/usr/bin/env python3
"""
Smart model router - routes tasks to appropriate models based on complexity.
Supports multiple providers: Anthropic, OpenAI, Google, OpenRouter.
Helps reduce token costs by using cheaper models for simpler tasks.
Version: 1.1.0
"""
import re
import os
import json
# ============================================================================
# PROVIDER CONFIGURATION
# ============================================================================
# Detect primary provider from environment (default: anthropic)
def detect_provider():
"""Detect which provider to use based on available API keys."""
if os.environ.get("ANTHROPIC_API_KEY"):
return "anthropic"
elif os.environ.get("OPENAI_API_KEY"):
return "openai"
elif os.environ.get("GOOGLE_API_KEY"):
return "google"
elif os.environ.get("OPENROUTER_API_KEY"):
return "openrouter"
# Default to anthropic
return "anthropic"
# Model tiers per provider
PROVIDER_MODELS = {
"anthropic": {
"cheap": "anthropic/claude-haiku-4",
"balanced": "anthropic/claude-sonnet-4-5",
"smart": "anthropic/claude-opus-4",
"costs": { # $/MTok (input)
"cheap": 0.25,
"balanced": 3.00,
"smart": 15.00
}
},
"openai": {
"cheap": "openai/gpt-4.1-nano",
"balanced": "openai/gpt-4.1-mini",
"smart": "openai/gpt-4.1",
"premium": "openai/gpt-5",
"costs": {
"cheap": 0.10,
"balanced": 0.40,
"smart": 2.00,
"premium": 10.00
}
},
"google": {
"cheap": "google/gemini-2.0-flash",
"balanced": "google/gemini-2.5-flash",
"smart": "google/gemini-2.5-pro",
"costs": {
"cheap": 0.075,
"balanced": 0.15,
"smart": 1.25
}
},
"openrouter": {
"cheap": "google/gemini-2.0-flash",
"balanced": "anthropic/claude-sonnet-4-5",
"smart": "anthropic/claude-opus-4",
"costs": {
"cheap": 0.075,
"balanced": 3.00,
"smart": 15.00
}
}
}
# Tier mapping for cross-provider compatibility
TIER_ALIASES = {
"haiku": "cheap",
"sonnet": "balanced",
"opus": "smart",
"nano": "cheap",
"mini": "balanced",
"flash": "cheap",
"pro": "smart"
}
# ============================================================================
# TASK CLASSIFICATION PATTERNS
# ============================================================================
# Communication patterns that should ALWAYS use cheap tier (never balanced/smart)
COMMUNICATION_PATTERNS = [
r'^(hi|hey|hello|yo|sup)\b',
r'^(thanks|thank you|thx)\b',
r'^(ok|okay|sure|got it|understood)\b',
r'^(yes|yeah|yep|yup|no|nope)\b',
r'^(good|great|nice|cool|awesome)\b',
r"^(what|how)'s (up|it going)",
r'^\w{1,15}$', # Single short word
r'^(lol|haha|lmao)\b',
]
# Background/routine tasks that should ALWAYS use cheap tier
BACKGROUND_TASK_PATTERNS = [
# Heartbeat checks
r'heartbeat',
r'check\s+(email|calendar|weather|monitoring)',
r'monitor\s+',
r'poll\s+',
# Cronjob/scheduled tasks
r'cron',
r'scheduled\s+task',
r'periodic\s+check',
r'reminder',
# Document parsing/extraction
r'parse\s+(document|file|log|csv|json|xml)',
r'extract\s+(text|data|content)\s+from',
r'read\s+(log|logs)',
r'scan\s+(file|document)',
r'process\s+(csv|json|xml|yaml)',
]
# Model routing rules with tier-based approach
ROUTING_RULES = {
"cheap": {
"patterns": [
r"read\s+file",
r"list\s+files",
r"show\s+(me\s+)?the\s+contents?",
r"what's\s+in",
r"cat\s+",
r"get\s+status",
r"check\s+(if|whether)",
r"is\s+\w+\s+(running|active|enabled)"
],
"keywords": ["read", "list", "show", "status", "check", "get"],
"cost_multiplier": 0.083 # vs balanced
},
"balanced": {
"patterns": [
r"write\s+\w+",
r"create\s+\w+",
r"edit\s+\w+",
r"fix\s+\w+",
r"debug\s+\w+",
r"explain\s+\w+",
r"how\s+(do|can)\s+i"
],
"keywords": ["write", "create", "edit", "update", "fix", "debug", "explain"],
"cost_multiplier": 1.0
},
"smart": {
"patterns": [
r"complex\s+\w+",
r"design\s+\w+",
r"architect\w+",
r"analyze\s+deeply",
r"comprehensive\s+\w+"
],
"keywords": ["design", "architect", "complex", "comprehensive", "deep"],
"cost_multiplier": 5.0
}
}
# Legacy tier names for backwards compatibility
LEGACY_TIER_MAP = {
"haiku": "cheap",
"sonnet": "balanced",
"opus": "smart"
}
# ============================================================================
# CORE FUNCTIONS
# ============================================================================
def classify_task(prompt):
"""Classify task complexity based on prompt text.
Args:
prompt: User's message/request
Returns:
tuple of (tier, confidence, reasoning)
tier is one of: cheap, balanced, smart
"""
prompt_lower = prompt.lower()
# FIRST: Check if this is simple communication (ALWAYS cheap)
for pattern in COMMUNICATION_PATTERNS:
if re.search(pattern, prompt_lower):
return ("cheap", 1.0, "Simple communication - use cheapest model")
# SECOND: Check if this is a background/routine task (ALWAYS cheap)
for pattern in BACKGROUND_TASK_PATTERNS:
if re.search(pattern, prompt_lower):
return ("cheap", 1.0, "Background task (heartbeat/cron/parsing) - use cheapest model")
# Score each tier
scores = {}
for tier, rules in ROUTING_RULES.items():
score = 0
matches = []
# Pattern matching
for pattern in rules["patterns"]:
if re.search(pattern, prompt_lower):
score += 2
matches.append(f"pattern:{pattern}")
# Keyword matching
for keyword in rules["keywords"]:
if keyword in prompt_lower:
score += 1
matches.append(f"keyword:{keyword}")
scores[tier] = {
"score": score,
"matches": matches
}
# Determine best tier
best_tier = max(scores.items(), key=lambda x: x[1]["score"])
if best_tier[1]["score"] == 0:
# Default to balanced if unclear
return ("balanced", 0.5, "No clear indicators, defaulting to balanced model")
confidence = min(best_tier[1]["score"] / 5.0, 1.0) # Cap at 1.0
reasoning = f"Matched: {', '.join(best_tier[1]['matches'][:3])}"
return (best_tier[0], confidence, reasoning)
def normalize_tier(tier):
"""Normalize tier name to standard format (cheap/balanced/smart)."""
tier_lower = tier.lower()
# Check legacy mappings
if tier_lower in LEGACY_TIER_MAP:
return LEGACY_TIER_MAP[tier_lower]
# Check aliases
if tier_lower in TIER_ALIASES:
return TIER_ALIASES[tier_lower]
# Already standard or unknown
if tier_lower in ["cheap", "balanced", "smart", "premium"]:
return tier_lower
return "balanced" # Default
def get_model_for_tier(tier, provider=None):
"""Get the specific model name for a tier and provider.
Args:
tier: cheap, balanced, smart, or premium
provider: anthropic, openai, google, openrouter (or None to auto-detect)
Returns:
Model identifier string
"""
if provider is None:
provider = detect_provider()
provider_config = PROVIDER_MODELS.get(provider, PROVIDER_MODELS["anthropic"])
# Normalize tier
tier = normalize_tier(tier)
# Get model (fallback to balanced if tier not available)
return provider_config.get(tier, provider_config.get("balanced"))
def route_task(prompt, current_model=None, force_tier=None, provider=None):
"""Route a task to appropriate model.
Args:
prompt: User's message/request
current_model: Current model being used (optional)
force_tier: Override classification (cheap/balanced/smart or haiku/sonnet/opus)
provider: Force specific provider (anthropic/openai/google/openrouter)
Returns:
dict with routing decision
"""
# Auto-detect provider if not specified
if provider is None:
provider = detect_provider()
# Set default current model
if current_model is None:
current_model = get_model_for_tier("balanced", provider)
if force_tier:
tier = normalize_tier(force_tier)
confidence = 1.0
reasoning = "User-specified tier"
else:
tier, confidence, reasoning = classify_task(prompt)
recommended_model = get_model_for_tier(tier, provider)
# Calculate cost savings
provider_config = PROVIDER_MODELS.get(provider, PROVIDER_MODELS["anthropic"])
base_cost = provider_config["costs"].get("balanced", 1.0)
tier_cost = provider_config["costs"].get(tier, base_cost)
cost_savings = (1.0 - (tier_cost / base_cost)) * 100
return {
"provider": provider,
"current_model": current_model,
"recommended_model": recommended_model,
"tier": tier,
"tier_display": {
"cheap": "Cheap (Haiku/Nano/Flash)",
"balanced": "Balanced (Sonnet/Mini/Flash)",
"smart": "Smart (Opus/GPT-4.1/Pro)",
"premium": "Premium (GPT-5)"
}.get(tier, tier),
"confidence": confidence,
"reasoning": reasoning,
"cost_savings_percent": max(0, cost_savings),
"should_switch": recommended_model != current_model,
"all_providers": {
p: get_model_for_tier(tier, p) for p in PROVIDER_MODELS.keys()
}
}
def get_model_comparison():
"""Get a comparison of all models across providers.
Returns:
dict with provider -> tier -> model mapping
"""
result = {}
for provider, config in PROVIDER_MODELS.items():
result[provider] = {
tier: {
"model": model,
"cost_per_mtok": config["costs"].get(tier, "N/A")
}
for tier, model in config.items()
if tier != "costs"
}
return result
# ============================================================================
# CLI INTERFACE
# ============================================================================
def main():
"""CLI interface for model router."""
import sys
if len(sys.argv) < 2:
print("Usage: model_router.py <command> [args]")
print("")
print("Commands:")
print(" route '<prompt>' [current_model] [force_tier] [provider]")
print(" compare — Show all models across providers")
print(" providers — List available providers")
print(" detect — Show auto-detected provider")
print("")
print("Examples:")
print(" model_router.py route 'thanks!'")
print(" model_router.py route 'design an architecture' --provider openai")
print(" model_router.py compare")
sys.exit(1)
command = sys.argv[1]
# Known commands
known_commands = ["route", "compare", "providers", "detect"]
if command == "route" or command not in known_commands:
# Route a prompt (either explicit "route" command or shorthand)
if command == "route":
if len(sys.argv) < 3:
print("Usage: model_router.py route '<prompt>'")
sys.exit(1)
prompt = sys.argv[2]
start_idx = 3
else:
# Shorthand: first arg is the prompt
prompt = command
start_idx = 2
# Parse remaining args
current_model = None
force_tier = None
provider = None
i = start_idx
while i < len(sys.argv):
arg = sys.argv[i]
if arg.startswith("--provider="):
provider = arg.split("=")[1]
elif arg.startswith("--tier="):
force_tier = arg.split("=")[1]
elif arg == "--provider" and i+1 < len(sys.argv):
provider = sys.argv[i+1]
i += 1
elif arg == "--tier" and i+1 < len(sys.argv):
force_tier = sys.argv[i+1]
i += 1
elif arg.startswith("--"):
pass # Skip unknown flags
elif current_model is None and "/" in arg:
current_model = arg
elif force_tier is None:
force_tier = arg
i += 1
result = route_task(prompt, current_model, force_tier, provider)
print(json.dumps(result, indent=2))
elif command == "compare":
result = get_model_comparison()
print(json.dumps(result, indent=2))
elif command == "providers":
print("Available providers:")
for provider in PROVIDER_MODELS.keys():
detected = " (detected)" if provider == detect_provider() else ""
print(f" - {provider}{detected}")
elif command == "detect":
provider = detect_provider()
print(f"Auto-detected provider: {provider}")
print(f"Models: {json.dumps(PROVIDER_MODELS[provider], indent=2)}")
else:
print(f"Unknown command: {command}")
sys.exit(1)
if __name__ == "__main__":
main()

67
scripts/optimize.sh Normal file
View File

@@ -0,0 +1,67 @@
#!/bin/bash
# optimize.sh - Quick CLI wrapper for token optimization tools
#
# Usage:
# ./optimize.sh route "your prompt here" # Route to appropriate model
# ./optimize.sh context # Generate optimized AGENTS.md
# ./optimize.sh recommend "prompt" # Recommend context files
# ./optimize.sh budget # Check token budget
# ./optimize.sh heartbeat # Install optimized heartbeat
#
# Examples:
# ./optimize.sh route "thanks!" # → cheap tier (Haiku)
# ./optimize.sh route "design an API" # → smart tier (Opus)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
case "$1" in
route|model)
shift
python3 "$SCRIPT_DIR/model_router.py" "$@"
;;
context|agents)
python3 "$SCRIPT_DIR/context_optimizer.py" generate-agents
;;
recommend|ctx)
shift
python3 "$SCRIPT_DIR/context_optimizer.py" recommend "$@"
;;
budget|tokens|check)
python3 "$SCRIPT_DIR/token_tracker.py" check
;;
heartbeat|hb)
DEST="${HOME}/.openclaw/workspace/HEARTBEAT.md"
cp "$SCRIPT_DIR/../assets/HEARTBEAT.template.md" "$DEST"
echo "✅ Installed optimized heartbeat to: $DEST"
;;
providers)
python3 "$SCRIPT_DIR/model_router.py" providers
;;
detect)
python3 "$SCRIPT_DIR/model_router.py" detect
;;
help|--help|-h|"")
echo "Token Optimizer CLI"
echo ""
echo "Usage: ./optimize.sh <command> [args]"
echo ""
echo "Commands:"
echo " route <prompt> Route prompt to appropriate model tier"
echo " context Generate optimized AGENTS.md"
echo " recommend <prompt> Recommend context files for prompt"
echo " budget Check current token budget"
echo " heartbeat Install optimized heartbeat"
echo " providers List available providers"
echo " detect Show auto-detected provider"
echo ""
echo "Examples:"
echo " ./optimize.sh route 'thanks!' # → cheap tier"
echo " ./optimize.sh route 'design an API' # → smart tier"
echo " ./optimize.sh budget # → current usage"
;;
*)
echo "Unknown command: $1"
echo "Run './optimize.sh help' for usage"
exit 1
;;
esac

156
scripts/token_tracker.py Normal file
View File

@@ -0,0 +1,156 @@
#!/usr/bin/env python3
"""
Token usage tracker with budget alerts.
Monitors API usage and warns when approaching limits.
"""
import json
import os
from datetime import datetime, timedelta
from pathlib import Path
STATE_FILE = Path.home() / ".openclaw/workspace/memory/token-tracker-state.json"
def load_state():
"""Load tracking state from file."""
if STATE_FILE.exists():
with open(STATE_FILE, 'r') as f:
return json.load(f)
return {
"daily_usage": {},
"alerts_sent": [],
"last_reset": datetime.now().isoformat()
}
def save_state(state):
"""Save tracking state to file."""
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(STATE_FILE, 'w') as f:
json.dump(state, f, indent=2)
def get_usage_from_session_status():
"""Parse session status to extract token usage.
Returns dict with input_tokens, output_tokens, and cost.
"""
# This would integrate with OpenClaw's session_status tool
# For now, returns placeholder structure
return {
"input_tokens": 0,
"output_tokens": 0,
"total_cost": 0.0,
"model": "anthropic/claude-sonnet-4-5"
}
def check_budget(daily_limit_usd=5.0, warn_threshold=0.8):
"""Check if usage is approaching daily budget.
Args:
daily_limit_usd: Daily spending limit in USD
warn_threshold: Fraction of limit to trigger warning (default 80%)
Returns:
dict with status, usage, limit, and alert message if applicable
"""
state = load_state()
today = datetime.now().date().isoformat()
# Reset if new day
if today not in state["daily_usage"]:
state["daily_usage"] = {today: {"cost": 0.0, "tokens": 0}}
state["alerts_sent"] = []
usage = state["daily_usage"][today]
percent_used = (usage["cost"] / daily_limit_usd) * 100
result = {
"date": today,
"cost": usage["cost"],
"tokens": usage["tokens"],
"limit": daily_limit_usd,
"percent_used": percent_used,
"status": "ok"
}
# Check thresholds
if percent_used >= 100:
result["status"] = "exceeded"
result["alert"] = f"⚠️ Daily budget exceeded! ${usage['cost']:.2f} / ${daily_limit_usd:.2f}"
elif percent_used >= (warn_threshold * 100):
result["status"] = "warning"
result["alert"] = f"⚠️ Approaching daily limit: ${usage['cost']:.2f} / ${daily_limit_usd:.2f} ({percent_used:.0f}%)"
return result
def suggest_cheaper_model(current_model, task_type="general"):
"""Suggest cheaper alternative models based on task type.
Args:
current_model: Currently configured model
task_type: Type of task (general, simple, complex)
Returns:
dict with suggestion and cost savings
"""
# Cost per 1M tokens (input/output average)
model_costs = {
"anthropic/claude-opus-4": 15.0,
"anthropic/claude-sonnet-4-5": 3.0,
"anthropic/claude-haiku-4": 0.25,
"google/gemini-2.0-flash-exp": 0.075,
"openai/gpt-4o": 2.5,
"openai/gpt-4o-mini": 0.15
}
suggestions = {
"simple": [
("anthropic/claude-haiku-4", "12x cheaper, great for file reads, routine checks"),
("google/gemini-2.0-flash-exp", "40x cheaper via OpenRouter, good for simple tasks")
],
"general": [
("anthropic/claude-sonnet-4-5", "Balanced performance and cost"),
("google/gemini-2.0-flash-exp", "Much cheaper, decent quality")
],
"complex": [
("anthropic/claude-opus-4", "Best reasoning, use sparingly"),
("anthropic/claude-sonnet-4-5", "Good balance for most complex tasks")
]
}
return {
"current": current_model,
"current_cost": model_costs.get(current_model, "unknown"),
"suggestions": suggestions.get(task_type, suggestions["general"])
}
def main():
"""CLI interface for token tracker."""
import sys
if len(sys.argv) < 2:
print("Usage: token_tracker.py [check|suggest|reset]")
sys.exit(1)
command = sys.argv[1]
if command == "check":
result = check_budget()
print(json.dumps(result, indent=2))
elif command == "suggest":
task = sys.argv[2] if len(sys.argv) > 2 else "general"
current = sys.argv[3] if len(sys.argv) > 3 else "anthropic/claude-sonnet-4-5"
result = suggest_cheaper_model(current, task)
print(json.dumps(result, indent=2))
elif command == "reset":
state = load_state()
state["daily_usage"] = {}
state["alerts_sent"] = []
save_state(state)
print("Token tracker state reset.")
else:
print(f"Unknown command: {command}")
sys.exit(1)
if __name__ == "__main__":
main()