Initial commit with translated description
This commit is contained in:
342
scripts/rumor_scanner.py
Normal file
342
scripts/rumor_scanner.py
Normal file
@@ -0,0 +1,342 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
🔮 RUMOR & BUZZ SCANNER
|
||||
Scans for early signals, rumors, and whispers before they become mainstream news.
|
||||
|
||||
Sources:
|
||||
- Twitter/X: "hearing", "rumor", "sources say", unusual buzz
|
||||
- Google News: M&A, insider, upgrade/downgrade
|
||||
- Unusual keywords detection
|
||||
|
||||
Usage: python3 rumor_scanner.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from urllib.request import urlopen, Request
|
||||
from urllib.parse import quote_plus
|
||||
import gzip
|
||||
|
||||
CACHE_DIR = Path(__file__).parent.parent / "cache"
|
||||
CACHE_DIR.mkdir(exist_ok=True)
|
||||
|
||||
# Bird CLI path
|
||||
BIRD_CLI = "/home/clawdbot/.nvm/versions/node/v24.12.0/bin/bird"
|
||||
BIRD_ENV = Path(__file__).parent.parent / ".env"
|
||||
|
||||
def load_env():
|
||||
"""Load environment variables from .env file."""
|
||||
if BIRD_ENV.exists():
|
||||
for line in BIRD_ENV.read_text().splitlines():
|
||||
if '=' in line and not line.startswith('#'):
|
||||
key, value = line.split('=', 1)
|
||||
os.environ[key.strip()] = value.strip().strip('"').strip("'")
|
||||
|
||||
def fetch_url(url, timeout=15):
|
||||
"""Fetch URL with headers."""
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
}
|
||||
req = Request(url, headers=headers)
|
||||
try:
|
||||
with urlopen(req, timeout=timeout) as resp:
|
||||
data = resp.read()
|
||||
if resp.info().get('Content-Encoding') == 'gzip':
|
||||
data = gzip.decompress(data)
|
||||
return data.decode('utf-8', errors='ignore')
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
def search_twitter_rumors():
|
||||
"""Search Twitter for rumors and early signals."""
|
||||
results = []
|
||||
|
||||
# Rumor-focused search queries
|
||||
queries = [
|
||||
'"hearing that" stock OR $',
|
||||
'"sources say" stock OR company',
|
||||
'"rumor" merger OR acquisition',
|
||||
'insider buying stock',
|
||||
'"upgrade" OR "downgrade" stock tomorrow',
|
||||
'$AAPL OR $TSLA OR $NVDA rumor',
|
||||
'"breaking" stock market',
|
||||
'M&A rumor',
|
||||
]
|
||||
|
||||
load_env()
|
||||
|
||||
for query in queries[:4]: # Limit to avoid rate limits
|
||||
try:
|
||||
cmd = [BIRD_CLI, 'search', query, '-n', '10', '--json']
|
||||
env = os.environ.copy()
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30, env=env)
|
||||
|
||||
if result.returncode == 0 and result.stdout:
|
||||
try:
|
||||
tweets = json.loads(result.stdout)
|
||||
for tweet in tweets:
|
||||
text = tweet.get('text', '')
|
||||
# Filter for actual rumors/signals
|
||||
if any(kw in text.lower() for kw in ['hearing', 'rumor', 'source', 'insider', 'upgrade', 'downgrade', 'breaking', 'M&A', 'merger', 'acquisition']):
|
||||
results.append({
|
||||
'source': 'twitter',
|
||||
'type': 'rumor',
|
||||
'text': text[:300],
|
||||
'author': tweet.get('author', {}).get('username', 'unknown'),
|
||||
'likes': tweet.get('likes', 0),
|
||||
'retweets': tweet.get('retweets', 0),
|
||||
'query': query
|
||||
})
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Dedupe by text similarity
|
||||
seen = set()
|
||||
unique = []
|
||||
for r in results:
|
||||
key = r['text'][:100]
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(r)
|
||||
|
||||
return unique
|
||||
|
||||
def search_twitter_buzz():
|
||||
"""Search Twitter for general stock buzz - what are people talking about?"""
|
||||
results = []
|
||||
|
||||
queries = [
|
||||
'$SPY OR $QQQ',
|
||||
'stock to buy',
|
||||
'calls OR puts expiring',
|
||||
'earnings play',
|
||||
'short squeeze',
|
||||
]
|
||||
|
||||
load_env()
|
||||
|
||||
for query in queries[:3]:
|
||||
try:
|
||||
cmd = [BIRD_CLI, 'search', query, '-n', '15', '--json']
|
||||
env = os.environ.copy()
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30, env=env)
|
||||
|
||||
if result.returncode == 0 and result.stdout:
|
||||
try:
|
||||
tweets = json.loads(result.stdout)
|
||||
for tweet in tweets:
|
||||
text = tweet.get('text', '')
|
||||
# Extract stock symbols
|
||||
symbols = re.findall(r'\$([A-Z]{1,5})\b', text)
|
||||
if symbols:
|
||||
results.append({
|
||||
'source': 'twitter',
|
||||
'type': 'buzz',
|
||||
'text': text[:300],
|
||||
'symbols': symbols,
|
||||
'author': tweet.get('author', {}).get('username', 'unknown'),
|
||||
'engagement': tweet.get('likes', 0) + tweet.get('retweets', 0) * 2
|
||||
})
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Sort by engagement
|
||||
results.sort(key=lambda x: x.get('engagement', 0), reverse=True)
|
||||
return results[:20]
|
||||
|
||||
def search_news_rumors():
|
||||
"""Search Google News for M&A, insider, upgrade news."""
|
||||
results = []
|
||||
|
||||
queries = [
|
||||
'merger acquisition rumor',
|
||||
'insider buying stock',
|
||||
'analyst upgrade stock',
|
||||
'takeover bid company',
|
||||
'SEC investigation company',
|
||||
]
|
||||
|
||||
for query in queries:
|
||||
url = f"https://news.google.com/rss/search?q={quote_plus(query)}&hl=en-US&gl=US&ceid=US:en"
|
||||
content = fetch_url(url)
|
||||
|
||||
if content:
|
||||
import xml.etree.ElementTree as ET
|
||||
try:
|
||||
root = ET.fromstring(content)
|
||||
for item in root.findall('.//item')[:5]:
|
||||
title = item.find('title')
|
||||
link = item.find('link')
|
||||
pub_date = item.find('pubDate')
|
||||
|
||||
if title is not None:
|
||||
title_text = title.text or ''
|
||||
# Extract company names or symbols
|
||||
results.append({
|
||||
'source': 'google_news',
|
||||
'type': 'news_rumor',
|
||||
'title': title_text,
|
||||
'link': link.text if link is not None else '',
|
||||
'date': pub_date.text if pub_date is not None else '',
|
||||
'query': query
|
||||
})
|
||||
except ET.ParseError:
|
||||
pass
|
||||
|
||||
return results
|
||||
|
||||
def extract_symbols_from_text(text):
|
||||
"""Extract stock symbols from text."""
|
||||
# $SYMBOL pattern
|
||||
dollar_symbols = re.findall(r'\$([A-Z]{1,5})\b', text)
|
||||
|
||||
# Common company name to symbol mapping
|
||||
company_map = {
|
||||
'apple': 'AAPL', 'tesla': 'TSLA', 'nvidia': 'NVDA', 'microsoft': 'MSFT',
|
||||
'google': 'GOOGL', 'amazon': 'AMZN', 'meta': 'META', 'netflix': 'NFLX',
|
||||
'coinbase': 'COIN', 'robinhood': 'HOOD', 'disney': 'DIS', 'intel': 'INTC',
|
||||
'amd': 'AMD', 'palantir': 'PLTR', 'gamestop': 'GME', 'amc': 'AMC',
|
||||
}
|
||||
|
||||
text_lower = text.lower()
|
||||
company_symbols = [sym for name, sym in company_map.items() if name in text_lower]
|
||||
|
||||
return list(set(dollar_symbols + company_symbols))
|
||||
|
||||
def calculate_rumor_score(item):
|
||||
"""Score a rumor by potential impact."""
|
||||
score = 0
|
||||
text = (item.get('text', '') + item.get('title', '')).lower()
|
||||
|
||||
# High impact keywords
|
||||
if any(kw in text for kw in ['merger', 'acquisition', 'takeover', 'buyout']):
|
||||
score += 5
|
||||
if any(kw in text for kw in ['insider', 'ceo buying', 'director buying']):
|
||||
score += 4
|
||||
if any(kw in text for kw in ['upgrade', 'price target raised']):
|
||||
score += 3
|
||||
if any(kw in text for kw in ['downgrade', 'sec investigation', 'fraud']):
|
||||
score += 3
|
||||
if any(kw in text for kw in ['hearing', 'sources say', 'rumor']):
|
||||
score += 2
|
||||
if any(kw in text for kw in ['breaking', 'just in', 'alert']):
|
||||
score += 2
|
||||
|
||||
# Engagement boost
|
||||
if item.get('engagement', 0) > 100:
|
||||
score += 2
|
||||
if item.get('likes', 0) > 50:
|
||||
score += 1
|
||||
|
||||
return score
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("🔮 RUMOR & BUZZ SCANNER")
|
||||
print(f"📅 {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
|
||||
print("=" * 60)
|
||||
print()
|
||||
print("🔍 Scanning for early signals...")
|
||||
print()
|
||||
|
||||
all_rumors = []
|
||||
all_buzz = []
|
||||
|
||||
# Twitter Rumors
|
||||
print(" 🐦 Twitter rumors...")
|
||||
rumors = search_twitter_rumors()
|
||||
print(f" ✅ {len(rumors)} potential rumors")
|
||||
all_rumors.extend(rumors)
|
||||
|
||||
# Twitter Buzz
|
||||
print(" 🐦 Twitter buzz...")
|
||||
buzz = search_twitter_buzz()
|
||||
print(f" ✅ {len(buzz)} buzz items")
|
||||
all_buzz.extend(buzz)
|
||||
|
||||
# News Rumors
|
||||
print(" 📰 News rumors...")
|
||||
news = search_news_rumors()
|
||||
print(f" ✅ {len(news)} news items")
|
||||
all_rumors.extend(news)
|
||||
|
||||
# Score and sort rumors
|
||||
for item in all_rumors:
|
||||
item['score'] = calculate_rumor_score(item)
|
||||
item['symbols'] = extract_symbols_from_text(item.get('text', '') + item.get('title', ''))
|
||||
|
||||
all_rumors.sort(key=lambda x: x['score'], reverse=True)
|
||||
|
||||
# Count symbol mentions in buzz
|
||||
symbol_counts = {}
|
||||
for item in all_buzz:
|
||||
for sym in item.get('symbols', []):
|
||||
symbol_counts[sym] = symbol_counts.get(sym, 0) + 1
|
||||
|
||||
# Output
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("🔮 RESULTS")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# Top Rumors
|
||||
print("🚨 TOP RUMORS (by potential impact):")
|
||||
print()
|
||||
for item in all_rumors[:10]:
|
||||
if item['score'] > 0:
|
||||
source = item['source']
|
||||
symbols = ', '.join(item.get('symbols', [])) or 'N/A'
|
||||
text = item.get('text', item.get('title', ''))[:80]
|
||||
print(f" [{item['score']}] [{source}] {symbols}")
|
||||
print(f" {text}...")
|
||||
print()
|
||||
|
||||
# Buzz Leaderboard
|
||||
print("📊 BUZZ LEADERBOARD (most discussed):")
|
||||
print()
|
||||
sorted_symbols = sorted(symbol_counts.items(), key=lambda x: x[1], reverse=True)
|
||||
for symbol, count in sorted_symbols[:15]:
|
||||
bar = "█" * min(count, 20)
|
||||
print(f" ${symbol:5} {bar} ({count})")
|
||||
|
||||
print()
|
||||
|
||||
# Recent Buzz Snippets
|
||||
print("💬 WHAT PEOPLE ARE SAYING:")
|
||||
print()
|
||||
for item in all_buzz[:8]:
|
||||
author = item.get('author', 'anon')
|
||||
text = item.get('text', '')[:120]
|
||||
engagement = item.get('engagement', 0)
|
||||
print(f" @{author} ({engagement}♥): {text}...")
|
||||
print()
|
||||
|
||||
# Save results
|
||||
output = {
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'rumors': all_rumors[:20],
|
||||
'buzz': all_buzz[:30],
|
||||
'symbol_counts': symbol_counts,
|
||||
}
|
||||
|
||||
output_file = CACHE_DIR / 'rumor_scan_latest.json'
|
||||
output_file.write_text(json.dumps(output, indent=2, default=str))
|
||||
print(f"💾 Saved: {output_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user