#!/usr/bin/env python3 """ Context optimizer - Analyze and minimize context loading. Tracks which files are actually needed and creates minimal bundles. """ import json import re from pathlib import Path from datetime import datetime, timedelta STATE_FILE = Path.home() / ".openclaw/workspace/memory/context-usage.json" # Files that should ALWAYS be loaded (identity/personality) ALWAYS_LOAD = [ "SOUL.md", "IDENTITY.md" ] # Files to load on-demand based on triggers CONDITIONAL_FILES = { "AGENTS.md": ["workflow", "process", "how do i", "remember", "what should"], "USER.md": ["user", "human", "owner", "about you", "who are you helping"], "TOOLS.md": ["tool", "camera", "ssh", "voice", "tts", "device"], "MEMORY.md": ["remember", "recall", "history", "past", "before", "last time"], "HEARTBEAT.md": ["heartbeat", "check", "monitor", "alert"], } # Files to NEVER load for simple conversations SKIP_FOR_SIMPLE = [ "docs/**/*.md", # Documentation "memory/20*.md", # Old daily logs "knowledge/**/*", # Knowledge base "tasks/**/*", # Task tracking ] def load_usage_state(): """Load context usage tracking.""" if STATE_FILE.exists(): with open(STATE_FILE, 'r') as f: return json.load(f) return { "file_access_count": {}, "last_accessed": {}, "session_summaries": [] } def save_usage_state(state): """Save context usage tracking.""" STATE_FILE.parent.mkdir(parents=True, exist_ok=True) with open(STATE_FILE, 'w') as f: json.dump(state, f, indent=2) def classify_prompt(prompt): """Classify prompt to determine context needs. Returns: tuple of (complexity, context_level, reasoning) complexity: simple | medium | complex context_level: minimal | standard | full """ prompt_lower = prompt.lower() # Simple conversational patterns (minimal context) simple_patterns = [ r'^(hi|hey|hello|thanks|thank you|ok|okay|yes|no|sure)\b', r'^(what|how)\'s (up|it going)', r'^\w{1,20}$', # Single word r'^(good|great|nice|cool)', ] for pattern in simple_patterns: if re.search(pattern, prompt_lower): return ("simple", "minimal", "Conversational/greeting pattern") # Check for file/documentation references if any(word in prompt_lower for word in ["read", "show", "file", "doc", "content"]): return ("simple", "standard", "File access request") # Check for memory/history references if any(word in prompt_lower for word in ["remember", "recall", "history", "before", "last time"]): return ("medium", "full", "Memory access needed") # Check for complex task indicators complex_indicators = ["design", "architect", "plan", "strategy", "analyze deeply", "comprehensive"] if any(word in prompt_lower for word in complex_indicators): return ("complex", "full", "Complex task requiring full context") # Default to standard for normal work requests return ("medium", "standard", "Standard work request") def recommend_context_bundle(prompt, current_files=None): """Recommend which files to load for a given prompt. Args: prompt: User's message current_files: List of files currently loaded (optional) Returns: dict with recommendations """ complexity, context_level, reasoning = classify_prompt(prompt) prompt_lower = prompt.lower() # Start with always-load files recommended = set(ALWAYS_LOAD) if context_level == "minimal": # For simple conversations, ONLY identity files pass elif context_level == "standard": # Add conditionally-loaded files based on triggers for file, triggers in CONDITIONAL_FILES.items(): if any(trigger in prompt_lower for trigger in triggers): recommended.add(file) # Add today's memory log only today = datetime.now().strftime("%Y-%m-%d") recommended.add(f"memory/{today}.md") elif context_level == "full": # Add all conditional files recommended.update(CONDITIONAL_FILES.keys()) # Add today + yesterday memory logs today = datetime.now() yesterday = today - timedelta(days=1) recommended.add(f"memory/{today.strftime('%Y-%m-%d')}.md") recommended.add(f"memory/{yesterday.strftime('%Y-%m-%d')}.md") # Add MEMORY.md for long-term context recommended.add("MEMORY.md") # Calculate savings if current_files: current_count = len(current_files) recommended_count = len(recommended) savings_percent = ((current_count - recommended_count) / current_count) * 100 else: savings_percent = None return { "complexity": complexity, "context_level": context_level, "reasoning": reasoning, "recommended_files": sorted(list(recommended)), "file_count": len(recommended), "savings_percent": savings_percent, "skip_patterns": SKIP_FOR_SIMPLE if context_level == "minimal" else [] } def record_file_access(file_path): """Record that a file was accessed.""" state = load_usage_state() # Increment access count state["file_access_count"][file_path] = state["file_access_count"].get(file_path, 0) + 1 # Update last accessed timestamp state["last_accessed"][file_path] = datetime.now().isoformat() save_usage_state(state) def get_usage_stats(): """Get file usage statistics. Returns: dict with frequently/rarely accessed files """ state = load_usage_state() # Sort by access count sorted_files = sorted( state["file_access_count"].items(), key=lambda x: x[1], reverse=True ) total_accesses = sum(state["file_access_count"].values()) # Classify files frequent = [] # Top 20% of accesses occasional = [] # Middle 60% rare = [] # Bottom 20% if sorted_files: threshold_frequent = total_accesses * 0.2 threshold_rare = total_accesses * 0.8 cumulative = 0 for file, count in sorted_files: cumulative += count if cumulative <= threshold_frequent: frequent.append({"file": file, "count": count}) elif cumulative <= threshold_rare: occasional.append({"file": file, "count": count}) else: rare.append({"file": file, "count": count}) return { "total_accesses": total_accesses, "unique_files": len(sorted_files), "frequent": frequent, "occasional": occasional, "rare": rare, "recommendation": f"Consider loading frequently accessed files upfront, lazy-load rare files" } def generate_optimized_agents_md(): """Generate an optimized AGENTS.md with lazy loading instructions. Returns: str with new AGENTS.md content """ return """# AGENTS.md - Token-Optimized Workspace ## 🎯 Context Loading Strategy (OPTIMIZED) **Default: Minimal context, load on-demand** ### Every Session (Always Load) 1. Read `SOUL.md` — Who you are (identity/personality) 2. Read `IDENTITY.md` — Your role/name **Stop there.** Don't load anything else unless needed. ### Load On-Demand Only **When user mentions memory/history:** - Read `MEMORY.md` - Read `memory/YYYY-MM-DD.md` (today only) **When user asks about workflows/processes:** - Read `AGENTS.md` (this file) **When user asks about tools/devices:** - Read `TOOLS.md` **When user asks about themselves:** - Read `USER.md` **Never load automatically:** - ❌ Documentation (`docs/**/*.md`) — load only when explicitly referenced - ❌ Old memory logs (`memory/2026-01-*.md`) — load only if user mentions date - ❌ Knowledge base (`knowledge/**/*`) — load only when user asks about specific topic - ❌ Task files (`tasks/**/*`) — load only when user references task ### Context by Conversation Type **Simple conversation** (hi, thanks, yes, quick question): - Load: SOUL.md, IDENTITY.md - Skip: Everything else - **Token savings: ~80%** **Standard work request** (write code, check file): - Load: SOUL.md, IDENTITY.md, memory/TODAY.md - Conditionally load: TOOLS.md (if mentions tools) - Skip: docs, old memory logs - **Token savings: ~50%** **Complex task** (design system, analyze history): - Load: SOUL.md, IDENTITY.md, MEMORY.md, memory/TODAY.md, memory/YESTERDAY.md - Conditionally load: Relevant docs/knowledge - Skip: Unrelated documentation - **Token savings: ~30%** ## 🔥 Model Selection (ENFORCED) **Simple conversations → HAIKU ONLY** - Greetings, acknowledgments, simple questions - Never use Sonnet/Opus for casual chat - Override: `session_status model=haiku-4` **Standard work → SONNET** - Code writing, file edits, explanations - Default model for most work **Complex reasoning → OPUS** - Architecture design, deep analysis - Use sparingly, only when explicitly needed ## 💾 Memory (Lazy Loading) **Daily notes:** `memory/YYYY-MM-DD.md` - ✅ Load TODAY when user asks about recent work - ❌ Don't load YESTERDAY unless explicitly needed - ❌ Don't load older logs automatically **Long-term:** `MEMORY.md` - ✅ Load when user mentions "remember", "history", "before" - ❌ Don't load for simple conversations ## 📊 Heartbeats (Optimized) Use `heartbeat_optimizer.py` from token-optimizer skill: - Check only what needs checking (not everything every time) - Skip during quiet hours (23:00-08:00) - Return `HEARTBEAT_OK` when nothing to report ## 🎨 Skills (Lazy Loading) **Don't pre-read skill documentation.** When skill triggers: 1. Read only the SKILL.md 2. Read only the specific reference files you need 3. Skip examples/assets unless explicitly needed ## 🚫 Anti-Patterns (What NOT to Do) ❌ Loading all docs at session start ❌ Re-reading unchanged files ❌ Using Opus for simple chat ❌ Checking everything in every heartbeat ❌ Loading full conversation history for simple questions ✅ Load minimal context by default ✅ Read files only when referenced ✅ Use cheapest model for the task ✅ Batch heartbeat checks intelligently ✅ Keep context focused on current task ## 📈 Monitoring Track your savings: ```bash python3 scripts/context_optimizer.py stats python3 scripts/token_tracker.py check ``` ## Integration Run context optimizer before responding: ```bash # Get recommendations context_optimizer.py recommend "" # Only load recommended files # Skip everything else ``` --- **This optimized approach reduces token usage by 50-80% for typical workloads.** """ def main(): """CLI interface for context optimizer.""" import sys if len(sys.argv) < 2: print("Usage: context_optimizer.py [recommend|record|stats|generate-agents]") sys.exit(1) command = sys.argv[1] if command == "recommend": if len(sys.argv) < 3: print("Usage: context_optimizer.py recommend '' [current_files]") sys.exit(1) prompt = sys.argv[2] current_files = sys.argv[3:] if len(sys.argv) > 3 else None result = recommend_context_bundle(prompt, current_files) print(json.dumps(result, indent=2)) elif command == "record": if len(sys.argv) < 3: print("Usage: context_optimizer.py record ") sys.exit(1) file_path = sys.argv[2] record_file_access(file_path) print(f"Recorded access: {file_path}") elif command == "stats": result = get_usage_stats() print(json.dumps(result, indent=2)) elif command == "generate-agents": content = generate_optimized_agents_md() output_path = Path.home() / ".openclaw/workspace/AGENTS.md.optimized" output_path.write_text(content) print(f"Generated optimized AGENTS.md at: {output_path}") print("\nReview and replace your current AGENTS.md with this version.") else: print(f"Unknown command: {command}") sys.exit(1) if __name__ == "__main__": main()