Files

396 lines
12 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
Context optimizer - Analyze and minimize context loading.
Tracks which files are actually needed and creates minimal bundles.
"""
import json
import re
from pathlib import Path
from datetime import datetime, timedelta
STATE_FILE = Path.home() / ".openclaw/workspace/memory/context-usage.json"
# Files that should ALWAYS be loaded (identity/personality)
ALWAYS_LOAD = [
"SOUL.md",
"IDENTITY.md"
]
# Files to load on-demand based on triggers
CONDITIONAL_FILES = {
"AGENTS.md": ["workflow", "process", "how do i", "remember", "what should"],
"USER.md": ["user", "human", "owner", "about you", "who are you helping"],
"TOOLS.md": ["tool", "camera", "ssh", "voice", "tts", "device"],
"MEMORY.md": ["remember", "recall", "history", "past", "before", "last time"],
"HEARTBEAT.md": ["heartbeat", "check", "monitor", "alert"],
}
# Files to NEVER load for simple conversations
SKIP_FOR_SIMPLE = [
"docs/**/*.md", # Documentation
"memory/20*.md", # Old daily logs
"knowledge/**/*", # Knowledge base
"tasks/**/*", # Task tracking
]
def load_usage_state():
"""Load context usage tracking."""
if STATE_FILE.exists():
with open(STATE_FILE, 'r') as f:
return json.load(f)
return {
"file_access_count": {},
"last_accessed": {},
"session_summaries": []
}
def save_usage_state(state):
"""Save context usage tracking."""
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(STATE_FILE, 'w') as f:
json.dump(state, f, indent=2)
def classify_prompt(prompt):
"""Classify prompt to determine context needs.
Returns:
tuple of (complexity, context_level, reasoning)
complexity: simple | medium | complex
context_level: minimal | standard | full
"""
prompt_lower = prompt.lower()
# Simple conversational patterns (minimal context)
simple_patterns = [
r'^(hi|hey|hello|thanks|thank you|ok|okay|yes|no|sure)\b',
r'^(what|how)\'s (up|it going)',
r'^\w{1,20}$', # Single word
r'^(good|great|nice|cool)',
]
for pattern in simple_patterns:
if re.search(pattern, prompt_lower):
return ("simple", "minimal", "Conversational/greeting pattern")
# Check for file/documentation references
if any(word in prompt_lower for word in ["read", "show", "file", "doc", "content"]):
return ("simple", "standard", "File access request")
# Check for memory/history references
if any(word in prompt_lower for word in ["remember", "recall", "history", "before", "last time"]):
return ("medium", "full", "Memory access needed")
# Check for complex task indicators
complex_indicators = ["design", "architect", "plan", "strategy", "analyze deeply", "comprehensive"]
if any(word in prompt_lower for word in complex_indicators):
return ("complex", "full", "Complex task requiring full context")
# Default to standard for normal work requests
return ("medium", "standard", "Standard work request")
def recommend_context_bundle(prompt, current_files=None):
"""Recommend which files to load for a given prompt.
Args:
prompt: User's message
current_files: List of files currently loaded (optional)
Returns:
dict with recommendations
"""
complexity, context_level, reasoning = classify_prompt(prompt)
prompt_lower = prompt.lower()
# Start with always-load files
recommended = set(ALWAYS_LOAD)
if context_level == "minimal":
# For simple conversations, ONLY identity files
pass
elif context_level == "standard":
# Add conditionally-loaded files based on triggers
for file, triggers in CONDITIONAL_FILES.items():
if any(trigger in prompt_lower for trigger in triggers):
recommended.add(file)
# Add today's memory log only
today = datetime.now().strftime("%Y-%m-%d")
recommended.add(f"memory/{today}.md")
elif context_level == "full":
# Add all conditional files
recommended.update(CONDITIONAL_FILES.keys())
# Add today + yesterday memory logs
today = datetime.now()
yesterday = today - timedelta(days=1)
recommended.add(f"memory/{today.strftime('%Y-%m-%d')}.md")
recommended.add(f"memory/{yesterday.strftime('%Y-%m-%d')}.md")
# Add MEMORY.md for long-term context
recommended.add("MEMORY.md")
# Calculate savings
if current_files:
current_count = len(current_files)
recommended_count = len(recommended)
savings_percent = ((current_count - recommended_count) / current_count) * 100
else:
savings_percent = None
return {
"complexity": complexity,
"context_level": context_level,
"reasoning": reasoning,
"recommended_files": sorted(list(recommended)),
"file_count": len(recommended),
"savings_percent": savings_percent,
"skip_patterns": SKIP_FOR_SIMPLE if context_level == "minimal" else []
}
def record_file_access(file_path):
"""Record that a file was accessed."""
state = load_usage_state()
# Increment access count
state["file_access_count"][file_path] = state["file_access_count"].get(file_path, 0) + 1
# Update last accessed timestamp
state["last_accessed"][file_path] = datetime.now().isoformat()
save_usage_state(state)
def get_usage_stats():
"""Get file usage statistics.
Returns:
dict with frequently/rarely accessed files
"""
state = load_usage_state()
# Sort by access count
sorted_files = sorted(
state["file_access_count"].items(),
key=lambda x: x[1],
reverse=True
)
total_accesses = sum(state["file_access_count"].values())
# Classify files
frequent = [] # Top 20% of accesses
occasional = [] # Middle 60%
rare = [] # Bottom 20%
if sorted_files:
threshold_frequent = total_accesses * 0.2
threshold_rare = total_accesses * 0.8
cumulative = 0
for file, count in sorted_files:
cumulative += count
if cumulative <= threshold_frequent:
frequent.append({"file": file, "count": count})
elif cumulative <= threshold_rare:
occasional.append({"file": file, "count": count})
else:
rare.append({"file": file, "count": count})
return {
"total_accesses": total_accesses,
"unique_files": len(sorted_files),
"frequent": frequent,
"occasional": occasional,
"rare": rare,
"recommendation": f"Consider loading frequently accessed files upfront, lazy-load rare files"
}
def generate_optimized_agents_md():
"""Generate an optimized AGENTS.md with lazy loading instructions.
Returns:
str with new AGENTS.md content
"""
return """# AGENTS.md - Token-Optimized Workspace
## 🎯 Context Loading Strategy (OPTIMIZED)
**Default: Minimal context, load on-demand**
### Every Session (Always Load)
1. Read `SOUL.md` Who you are (identity/personality)
2. Read `IDENTITY.md` Your role/name
**Stop there.** Don't load anything else unless needed.
### Load On-Demand Only
**When user mentions memory/history:**
- Read `MEMORY.md`
- Read `memory/YYYY-MM-DD.md` (today only)
**When user asks about workflows/processes:**
- Read `AGENTS.md` (this file)
**When user asks about tools/devices:**
- Read `TOOLS.md`
**When user asks about themselves:**
- Read `USER.md`
**Never load automatically:**
- Documentation (`docs/**/*.md`) load only when explicitly referenced
- Old memory logs (`memory/2026-01-*.md`) load only if user mentions date
- Knowledge base (`knowledge/**/*`) load only when user asks about specific topic
- Task files (`tasks/**/*`) load only when user references task
### Context by Conversation Type
**Simple conversation** (hi, thanks, yes, quick question):
- Load: SOUL.md, IDENTITY.md
- Skip: Everything else
- **Token savings: ~80%**
**Standard work request** (write code, check file):
- Load: SOUL.md, IDENTITY.md, memory/TODAY.md
- Conditionally load: TOOLS.md (if mentions tools)
- Skip: docs, old memory logs
- **Token savings: ~50%**
**Complex task** (design system, analyze history):
- Load: SOUL.md, IDENTITY.md, MEMORY.md, memory/TODAY.md, memory/YESTERDAY.md
- Conditionally load: Relevant docs/knowledge
- Skip: Unrelated documentation
- **Token savings: ~30%**
## 🔥 Model Selection (ENFORCED)
**Simple conversations HAIKU ONLY**
- Greetings, acknowledgments, simple questions
- Never use Sonnet/Opus for casual chat
- Override: `session_status model=haiku-4`
**Standard work SONNET**
- Code writing, file edits, explanations
- Default model for most work
**Complex reasoning OPUS**
- Architecture design, deep analysis
- Use sparingly, only when explicitly needed
## 💾 Memory (Lazy Loading)
**Daily notes:** `memory/YYYY-MM-DD.md`
- Load TODAY when user asks about recent work
- Don't load YESTERDAY unless explicitly needed
- Don't load older logs automatically
**Long-term:** `MEMORY.md`
- Load when user mentions "remember", "history", "before"
- Don't load for simple conversations
## 📊 Heartbeats (Optimized)
Use `heartbeat_optimizer.py` from token-optimizer skill:
- Check only what needs checking (not everything every time)
- Skip during quiet hours (23:00-08:00)
- Return `HEARTBEAT_OK` when nothing to report
## 🎨 Skills (Lazy Loading)
**Don't pre-read skill documentation.**
When skill triggers:
1. Read only the SKILL.md
2. Read only the specific reference files you need
3. Skip examples/assets unless explicitly needed
## 🚫 Anti-Patterns (What NOT to Do)
Loading all docs at session start
Re-reading unchanged files
Using Opus for simple chat
Checking everything in every heartbeat
Loading full conversation history for simple questions
Load minimal context by default
Read files only when referenced
Use cheapest model for the task
Batch heartbeat checks intelligently
Keep context focused on current task
## 📈 Monitoring
Track your savings:
```bash
python3 scripts/context_optimizer.py stats
python3 scripts/token_tracker.py check
```
## Integration
Run context optimizer before responding:
```bash
# Get recommendations
context_optimizer.py recommend "<user prompt>"
# Only load recommended files
# Skip everything else
```
---
**This optimized approach reduces token usage by 50-80% for typical workloads.**
"""
def main():
"""CLI interface for context optimizer."""
import sys
if len(sys.argv) < 2:
print("Usage: context_optimizer.py [recommend|record|stats|generate-agents]")
sys.exit(1)
command = sys.argv[1]
if command == "recommend":
if len(sys.argv) < 3:
print("Usage: context_optimizer.py recommend '<prompt>' [current_files]")
sys.exit(1)
prompt = sys.argv[2]
current_files = sys.argv[3:] if len(sys.argv) > 3 else None
result = recommend_context_bundle(prompt, current_files)
print(json.dumps(result, indent=2))
elif command == "record":
if len(sys.argv) < 3:
print("Usage: context_optimizer.py record <file_path>")
sys.exit(1)
file_path = sys.argv[2]
record_file_access(file_path)
print(f"Recorded access: {file_path}")
elif command == "stats":
result = get_usage_stats()
print(json.dumps(result, indent=2))
elif command == "generate-agents":
content = generate_optimized_agents_md()
output_path = Path.home() / ".openclaw/workspace/AGENTS.md.optimized"
output_path.write_text(content)
print(f"Generated optimized AGENTS.md at: {output_path}")
print("\nReview and replace your current AGENTS.md with this version.")
else:
print(f"Unknown command: {command}")
sys.exit(1)
if __name__ == "__main__":
main()