1127 lines
38 KiB
Python
1127 lines
38 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
News Fetcher - Aggregate news from multiple sources.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from email.utils import parsedate_to_datetime
|
|
from pathlib import Path
|
|
import ssl
|
|
import urllib.error
|
|
import urllib.request
|
|
import yfinance as yf
|
|
import pandas as pd
|
|
|
|
from utils import clamp_timeout, compute_deadline, ensure_venv, time_left
|
|
|
|
# Retry configuration
|
|
DEFAULT_MAX_RETRIES = 3
|
|
DEFAULT_RETRY_DELAY = 1 # Base delay in seconds (exponential backoff)
|
|
|
|
|
|
def fetch_with_retry(
|
|
url: str,
|
|
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
base_delay: float = DEFAULT_RETRY_DELAY,
|
|
timeout: int = 15,
|
|
deadline: float | None = None,
|
|
) -> bytes | None:
|
|
"""
|
|
Fetch URL content with exponential backoff retry.
|
|
|
|
Args:
|
|
url: URL to fetch
|
|
max_retries: Maximum number of retry attempts
|
|
base_delay: Base delay in seconds (exponential backoff: delay * 2^attempt)
|
|
timeout: Request timeout in seconds
|
|
deadline: Overall deadline timestamp
|
|
|
|
Returns:
|
|
Response content as bytes (feedparser handles encoding), or None if all retries failed
|
|
"""
|
|
last_error = None
|
|
|
|
for attempt in range(max_retries + 1): # +1 because attempt 0 is the first try
|
|
# Check deadline before each attempt
|
|
if time_left(deadline) is not None and time_left(deadline) <= 0:
|
|
print(f"⚠️ Deadline exceeded, skipping fetch: {url}", file=sys.stderr)
|
|
return None
|
|
|
|
try:
|
|
req = urllib.request.Request(url, headers={'User-Agent': 'OpenClaw/1.0'})
|
|
with urllib.request.urlopen(req, timeout=timeout, context=SSL_CONTEXT) as response:
|
|
return response.read()
|
|
except urllib.error.URLError as e:
|
|
last_error = e
|
|
if attempt < max_retries:
|
|
delay = base_delay * (2 ** attempt) # Exponential backoff
|
|
print(f"⚠️ Fetch failed (attempt {attempt + 1}/{max_retries + 1}): {e}. Retrying in {delay}s...", file=sys.stderr)
|
|
time.sleep(delay)
|
|
except TimeoutError:
|
|
last_error = TimeoutError("Request timed out")
|
|
if attempt < max_retries:
|
|
delay = base_delay * (2 ** attempt)
|
|
print(f"⚠️ Timeout (attempt {attempt + 1}/{max_retries + 1}). Retrying in {delay}s...", file=sys.stderr)
|
|
time.sleep(delay)
|
|
except Exception as e:
|
|
last_error = e
|
|
print(f"⚠️ Unexpected error fetching {url}: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
print(f"⚠️ All {max_retries + 1} attempts failed for {url}: {last_error}", file=sys.stderr)
|
|
return None
|
|
|
|
SCRIPT_DIR = Path(__file__).parent
|
|
CONFIG_DIR = SCRIPT_DIR.parent / "config"
|
|
CACHE_DIR = SCRIPT_DIR.parent / "cache"
|
|
|
|
# Ensure cache directory exists
|
|
CACHE_DIR.mkdir(exist_ok=True)
|
|
|
|
CA_FILE = (
|
|
os.environ.get("SSL_CERT_FILE")
|
|
or ("/etc/ssl/certs/ca-bundle.crt" if os.path.exists("/etc/ssl/certs/ca-bundle.crt") else None)
|
|
or ("/etc/ssl/certs/ca-certificates.crt" if os.path.exists("/etc/ssl/certs/ca-certificates.crt") else None)
|
|
)
|
|
SSL_CONTEXT = ssl.create_default_context(cafile=CA_FILE) if CA_FILE else ssl.create_default_context()
|
|
|
|
DEFAULT_HEADLINE_SOURCES = ["barrons", "ft", "wsj", "cnbc"]
|
|
DEFAULT_SOURCE_WEIGHTS = {
|
|
"barrons": 4,
|
|
"ft": 4,
|
|
"wsj": 3,
|
|
"cnbc": 2
|
|
}
|
|
|
|
|
|
ensure_venv()
|
|
|
|
import feedparser
|
|
|
|
|
|
class PortfolioError(Exception):
|
|
"""Portfolio configuration or fetch error."""
|
|
|
|
|
|
def ensure_portfolio_config():
|
|
"""Copy portfolio.csv.example to portfolio.csv if real file doesn't exist."""
|
|
example_file = CONFIG_DIR / "portfolio.csv.example"
|
|
real_file = CONFIG_DIR / "portfolio.csv"
|
|
|
|
if real_file.exists():
|
|
return
|
|
|
|
if example_file.exists():
|
|
try:
|
|
shutil.copy(example_file, real_file)
|
|
print(f"📋 Created portfolio.csv from example", file=sys.stderr)
|
|
except PermissionError:
|
|
print(f"⚠️ Cannot create portfolio.csv (read-only environment)", file=sys.stderr)
|
|
else:
|
|
print(f"⚠️ No portfolio.csv or portfolio.csv.example found", file=sys.stderr)
|
|
|
|
|
|
# Initialize user config (copy example if needed)
|
|
ensure_portfolio_config()
|
|
|
|
|
|
def get_openbb_binary() -> str:
|
|
"""
|
|
Find openbb-quote binary.
|
|
|
|
Checks (in order):
|
|
1. OPENBB_QUOTE_BIN environment variable
|
|
2. PATH via shutil.which()
|
|
|
|
Returns:
|
|
Path to openbb-quote binary
|
|
|
|
Raises:
|
|
RuntimeError: If openbb-quote is not found
|
|
"""
|
|
# Check env var override
|
|
env_path = os.environ.get('OPENBB_QUOTE_BIN')
|
|
if env_path:
|
|
if os.path.isfile(env_path) and os.access(env_path, os.X_OK):
|
|
return env_path
|
|
else:
|
|
print(f"⚠️ OPENBB_QUOTE_BIN={env_path} is not a valid executable", file=sys.stderr)
|
|
|
|
# Check PATH
|
|
binary = shutil.which('openbb-quote')
|
|
if binary:
|
|
return binary
|
|
|
|
# Not found - show helpful error
|
|
raise RuntimeError(
|
|
"openbb-quote not found!\n\n"
|
|
"Installation options:\n"
|
|
"1. Install via pip: pip install openbb\n"
|
|
"2. Use existing install: export OPENBB_QUOTE_BIN=/path/to/openbb-quote\n"
|
|
"3. Add to PATH: export PATH=$PATH:$HOME/.local/bin\n\n"
|
|
"See: https://github.com/kesslerio/finance-news-openclaw-skill#dependencies"
|
|
)
|
|
|
|
|
|
# Cache the binary path on module load
|
|
try:
|
|
OPENBB_BINARY = get_openbb_binary()
|
|
except RuntimeError as e:
|
|
print(f"❌ {e}", file=sys.stderr)
|
|
OPENBB_BINARY = None
|
|
|
|
|
|
def load_sources():
|
|
"""Load source configuration."""
|
|
config_path = CONFIG_DIR / "config.json"
|
|
if config_path.exists():
|
|
with open(config_path, 'r') as f:
|
|
return json.load(f)
|
|
legacy_path = CONFIG_DIR / "sources.json"
|
|
if legacy_path.exists():
|
|
print("⚠️ config/config.json missing; falling back to config/sources.json", file=sys.stderr)
|
|
with open(legacy_path, 'r') as f:
|
|
return json.load(f)
|
|
raise FileNotFoundError("Missing config/config.json")
|
|
|
|
|
|
def _get_best_feed_url(feeds: dict) -> str | None:
|
|
"""Get the best feed URL from a feeds configuration dict.
|
|
|
|
Uses explicit priority list and validates URLs to avoid selecting
|
|
non-URL values like 'name' or other config keys.
|
|
|
|
Args:
|
|
feeds: Dict with feed keys like 'top', 'markets', 'tech'
|
|
|
|
Returns:
|
|
Best URL string or None if no valid URL found
|
|
"""
|
|
# Priority order for feed types (most relevant first)
|
|
PRIORITY_KEYS = ['top', 'markets', 'headlines', 'breaking']
|
|
|
|
for key in PRIORITY_KEYS:
|
|
if key in feeds:
|
|
value = feeds[key]
|
|
# Validate it's a string and starts with http
|
|
if isinstance(value, str) and value.startswith('http'):
|
|
return value
|
|
|
|
# Fallback: search all values for valid URLs (skip non-string/non-URL)
|
|
for key, value in feeds.items():
|
|
if key == 'name':
|
|
continue # Skip 'name' field
|
|
if isinstance(value, str) and value.startswith('http'):
|
|
return value
|
|
|
|
return None
|
|
|
|
|
|
def fetch_rss(url: str, limit: int = 10, timeout: int = 15, deadline: float | None = None) -> list[dict]:
|
|
"""Fetch and parse RSS/Atom feed using feedparser with retry logic."""
|
|
# Fetch content with retry (returns bytes for feedparser to handle encoding)
|
|
content = fetch_with_retry(url, timeout=timeout, deadline=deadline)
|
|
if content is None:
|
|
return []
|
|
|
|
# Parse with feedparser (handles RSS and Atom formats, auto-detects encoding from bytes)
|
|
try:
|
|
parsed = feedparser.parse(content)
|
|
except Exception as e:
|
|
print(f"⚠️ Error parsing feed {url}: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
items = []
|
|
for entry in parsed.entries[:limit]:
|
|
# Skip entries without title or link
|
|
title = entry.get('title', '').strip()
|
|
if not title:
|
|
continue
|
|
|
|
# Link handling: Atom uses 'link' dict, RSS uses string
|
|
link = entry.get('link', '')
|
|
if isinstance(link, dict):
|
|
link = link.get('href', '').strip()
|
|
if not link:
|
|
continue
|
|
|
|
# Date handling: different formats across feeds
|
|
published = entry.get('published', '') or entry.get('updated', '')
|
|
published_at = None
|
|
if published:
|
|
try:
|
|
published_at = parsedate_to_datetime(published).timestamp()
|
|
except Exception:
|
|
published_at = None
|
|
|
|
# Description handling: summary vs description
|
|
description = entry.get('summary', '') or entry.get('description', '')
|
|
|
|
items.append({
|
|
'title': title,
|
|
'link': link,
|
|
'date': published.strip() if published else '',
|
|
'published_at': published_at,
|
|
'description': (description or '')[:200].strip()
|
|
})
|
|
|
|
return items
|
|
|
|
|
|
def _fetch_via_openbb(
|
|
openbb_bin: str,
|
|
symbol: str,
|
|
timeout: int,
|
|
deadline: float | None,
|
|
allow_price_fallback: bool,
|
|
) -> dict | None:
|
|
"""Fetch single symbol via openbb-quote subprocess."""
|
|
try:
|
|
effective_timeout = clamp_timeout(timeout, deadline)
|
|
except TimeoutError:
|
|
return None
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
[openbb_bin, symbol],
|
|
capture_output=True,
|
|
text=True,
|
|
stdin=subprocess.DEVNULL,
|
|
timeout=effective_timeout,
|
|
check=False
|
|
)
|
|
if result.returncode != 0:
|
|
return None
|
|
|
|
data = json.loads(result.stdout)
|
|
|
|
# Normalize response structure
|
|
if isinstance(data, dict) and "results" in data and isinstance(data["results"], list):
|
|
data = data["results"][0] if data["results"] else {}
|
|
elif isinstance(data, list):
|
|
data = data[0] if data else {}
|
|
|
|
if not isinstance(data, dict):
|
|
return None
|
|
|
|
# Price fallback: use open or prev_close if price is None
|
|
if allow_price_fallback and data.get("price") is None:
|
|
if data.get("open") is not None:
|
|
data["price"] = data["open"]
|
|
elif data.get("prev_close") is not None:
|
|
data["price"] = data["prev_close"]
|
|
|
|
# Calculate change_percent if missing
|
|
if data.get("change_percent") is None and data.get("price") and data.get("prev_close"):
|
|
price = data["price"]
|
|
prev_close = data["prev_close"]
|
|
if prev_close != 0:
|
|
data["change_percent"] = ((price - prev_close) / prev_close) * 100
|
|
|
|
data["symbol"] = symbol
|
|
return data
|
|
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _fetch_via_yfinance(
|
|
symbols: list[str],
|
|
timeout: int,
|
|
deadline: float | None,
|
|
) -> dict:
|
|
"""Fetch symbols via yfinance batch download (fallback)."""
|
|
results = {}
|
|
if not symbols:
|
|
return results
|
|
|
|
try:
|
|
if time_left(deadline) is not None and time_left(deadline) <= 0:
|
|
return results
|
|
|
|
tickers = " ".join(symbols)
|
|
df = yf.download(tickers, period="5d", progress=False, threads=True, ignore_tz=True)
|
|
|
|
for symbol in symbols:
|
|
try:
|
|
if df.empty:
|
|
continue
|
|
|
|
# Handle yfinance MultiIndex columns (yfinance >= 0.2.0)
|
|
if isinstance(df.columns, pd.MultiIndex):
|
|
try:
|
|
s_df = df.xs(symbol, level=1, axis=1, drop_level=True)
|
|
except (KeyError, AttributeError):
|
|
continue
|
|
elif len(symbols) == 1:
|
|
# Flat columns only valid for single-symbol requests
|
|
s_df = df
|
|
else:
|
|
# Multi-symbol request but flat columns (only one ticker returned data)
|
|
# Skip to avoid misattributing prices to wrong symbols
|
|
continue
|
|
|
|
if s_df.empty:
|
|
continue
|
|
|
|
s_df = s_df.dropna(subset=['Close'])
|
|
if s_df.empty:
|
|
continue
|
|
|
|
latest = s_df.iloc[-1]
|
|
price = float(latest['Close'])
|
|
|
|
prev_close = 0.0
|
|
change_percent = 0.0
|
|
if len(s_df) > 1:
|
|
prev_row = s_df.iloc[-2]
|
|
prev_close = float(prev_row['Close'])
|
|
if prev_close > 0:
|
|
change_percent = ((price - prev_close) / prev_close) * 100
|
|
|
|
results[symbol] = {
|
|
"price": price,
|
|
"change_percent": change_percent,
|
|
"prev_close": prev_close,
|
|
"symbol": symbol
|
|
}
|
|
except Exception:
|
|
continue
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ yfinance batch failed: {e}", file=sys.stderr)
|
|
|
|
return results
|
|
|
|
|
|
def fetch_market_data(
|
|
symbols: list[str],
|
|
timeout: int = 30,
|
|
deadline: float | None = None,
|
|
allow_price_fallback: bool = False,
|
|
) -> dict:
|
|
"""Fetch market data using openbb-quote (primary) with yfinance fallback."""
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
results = {}
|
|
if not symbols:
|
|
return results
|
|
|
|
failed_symbols = []
|
|
|
|
# 1. Try openbb-quote first (primary source)
|
|
if OPENBB_BINARY:
|
|
def fetch_one(sym):
|
|
return sym, _fetch_via_openbb(
|
|
OPENBB_BINARY, sym, timeout, deadline, allow_price_fallback
|
|
)
|
|
|
|
# Parallel fetch with ThreadPoolExecutor
|
|
with ThreadPoolExecutor(max_workers=min(8, len(symbols))) as executor:
|
|
futures = {executor.submit(fetch_one, s): s for s in symbols}
|
|
for future in as_completed(futures):
|
|
try:
|
|
sym, data = future.result()
|
|
if data:
|
|
results[sym] = data
|
|
else:
|
|
failed_symbols.append(sym)
|
|
except Exception:
|
|
failed_symbols.append(futures[future])
|
|
else:
|
|
# No openbb available, all symbols go to yfinance fallback
|
|
print("⚠️ openbb-quote not found, using yfinance fallback", file=sys.stderr)
|
|
failed_symbols = list(symbols)
|
|
|
|
# 2. Fallback to yfinance for any symbols that failed
|
|
if failed_symbols:
|
|
yf_results = _fetch_via_yfinance(failed_symbols, timeout, deadline)
|
|
results.update(yf_results)
|
|
|
|
return results
|
|
|
|
|
|
def fetch_ticker_news(symbol: str, limit: int = 5) -> list[dict]:
|
|
"""Fetch news for a specific ticker via Yahoo Finance RSS."""
|
|
url = f"https://feeds.finance.yahoo.com/rss/2.0/headline?s={symbol}®ion=US&lang=en-US"
|
|
return fetch_rss(url, limit)
|
|
|
|
|
|
def get_cached_news(cache_key: str) -> dict | None:
|
|
"""Get cached news if fresh (< 15 minutes)."""
|
|
cache_file = CACHE_DIR / f"{cache_key}.json"
|
|
|
|
if cache_file.exists():
|
|
mtime = datetime.fromtimestamp(cache_file.stat().st_mtime)
|
|
if datetime.now() - mtime < timedelta(minutes=15):
|
|
with open(cache_file, 'r') as f:
|
|
return json.load(f)
|
|
|
|
return None
|
|
|
|
|
|
def save_cache(cache_key: str, data: dict):
|
|
"""Save news to cache."""
|
|
cache_file = CACHE_DIR / f"{cache_key}.json"
|
|
with open(cache_file, 'w') as f:
|
|
json.dump(data, f, indent=2, default=str)
|
|
|
|
|
|
def fetch_all_news(args):
|
|
"""Fetch news from all configured sources."""
|
|
sources = load_sources()
|
|
cache_key = f"all_news_{datetime.now().strftime('%Y%m%d_%H')}"
|
|
|
|
# Check cache first
|
|
if not args.force:
|
|
cached = get_cached_news(cache_key)
|
|
if cached:
|
|
print(json.dumps(cached, indent=2))
|
|
return
|
|
|
|
news = {
|
|
'fetched_at': datetime.now().isoformat(),
|
|
'sources': {}
|
|
}
|
|
|
|
# Fetch RSS feeds
|
|
for source_id, feeds in sources['rss_feeds'].items():
|
|
# Skip disabled sources
|
|
if not feeds.get('enabled', True):
|
|
continue
|
|
|
|
news['sources'][source_id] = {
|
|
'name': feeds.get('name', source_id),
|
|
'articles': []
|
|
}
|
|
|
|
for feed_name, feed_url in feeds.items():
|
|
if feed_name in ('name', 'enabled', 'note'):
|
|
continue
|
|
|
|
articles = fetch_rss(feed_url, args.limit)
|
|
for article in articles:
|
|
article['feed'] = feed_name
|
|
news['sources'][source_id]['articles'].extend(articles)
|
|
|
|
# Save to cache
|
|
save_cache(cache_key, news)
|
|
|
|
if args.json:
|
|
print(json.dumps(news, indent=2))
|
|
else:
|
|
for source_id, source_data in news['sources'].items():
|
|
print(f"\n### {source_data['name']}\n")
|
|
for article in source_data['articles'][:args.limit]:
|
|
print(f"• {article['title']}")
|
|
if args.verbose and article.get('description'):
|
|
print(f" {article['description'][:100]}...")
|
|
|
|
|
|
def get_market_news(
|
|
limit: int = 5,
|
|
regions: list[str] | None = None,
|
|
max_indices_per_region: int | None = None,
|
|
language: str | None = None,
|
|
deadline: float | None = None,
|
|
rss_timeout: int = 15,
|
|
subprocess_timeout: int = 30,
|
|
) -> dict:
|
|
"""Get market overview (indices + top headlines) as data."""
|
|
sources = load_sources()
|
|
source_weights = sources.get("source_weights", DEFAULT_SOURCE_WEIGHTS)
|
|
headline_sources = sources.get("headline_sources", DEFAULT_HEADLINE_SOURCES)
|
|
sources_by_lang = sources.get("headline_sources_by_lang", {})
|
|
if language and isinstance(sources_by_lang, dict):
|
|
lang_sources = sources_by_lang.get(language)
|
|
if isinstance(lang_sources, list) and lang_sources:
|
|
headline_sources = lang_sources
|
|
headline_exclude = set(sources.get("headline_exclude", []))
|
|
|
|
result = {
|
|
'fetched_at': datetime.now().isoformat(),
|
|
'markets': {},
|
|
'headlines': []
|
|
}
|
|
|
|
# Fetch market indices FIRST (fast, important for briefing)
|
|
for region, config in sources['markets'].items():
|
|
if time_left(deadline) is not None and time_left(deadline) <= 0:
|
|
break
|
|
if regions is not None and region not in regions:
|
|
continue
|
|
|
|
result['markets'][region] = {
|
|
'name': config['name'],
|
|
'indices': {}
|
|
}
|
|
|
|
symbols = config['indices']
|
|
if max_indices_per_region is not None:
|
|
symbols = symbols[:max_indices_per_region]
|
|
|
|
for symbol in symbols:
|
|
if time_left(deadline) is not None and time_left(deadline) <= 0:
|
|
break
|
|
data = fetch_market_data(
|
|
[symbol],
|
|
timeout=subprocess_timeout,
|
|
deadline=deadline,
|
|
allow_price_fallback=True,
|
|
)
|
|
if symbol in data:
|
|
result['markets'][region]['indices'][symbol] = {
|
|
'name': config['index_names'].get(symbol, symbol),
|
|
'data': data[symbol]
|
|
}
|
|
|
|
# Fetch top headlines from preferred sources
|
|
for source in headline_sources:
|
|
if time_left(deadline) is not None and time_left(deadline) <= 0:
|
|
break
|
|
if source in headline_exclude:
|
|
continue
|
|
if source in sources['rss_feeds']:
|
|
feeds = sources['rss_feeds'][source]
|
|
if not feeds.get("enabled", True):
|
|
continue
|
|
feed_url = _get_best_feed_url(feeds)
|
|
if feed_url:
|
|
try:
|
|
effective_timeout = clamp_timeout(rss_timeout, deadline)
|
|
except TimeoutError:
|
|
break
|
|
articles = fetch_rss(feed_url, limit, timeout=effective_timeout, deadline=deadline)
|
|
for article in articles:
|
|
article['source_id'] = source
|
|
article['source'] = feeds.get('name', source)
|
|
article['weight'] = source_weights.get(source, 1)
|
|
result['headlines'].extend(articles)
|
|
|
|
return result
|
|
|
|
|
|
def fetch_market_news(args):
|
|
"""Fetch market overview (indices + top headlines)."""
|
|
deadline = compute_deadline(args.deadline)
|
|
result = get_market_news(args.limit, deadline=deadline)
|
|
|
|
if args.json:
|
|
print(json.dumps(result, indent=2))
|
|
else:
|
|
print("\n📊 Market Overview\n")
|
|
for region, data in result['markets'].items():
|
|
print(f"**{data['name']}**")
|
|
for symbol, idx in data['indices'].items():
|
|
if 'data' in idx and idx['data']:
|
|
price = idx['data'].get('price', 'N/A')
|
|
change_pct = idx['data'].get('change_percent', 0)
|
|
emoji = '📈' if change_pct >= 0 else '📉'
|
|
print(f" {emoji} {idx['name']}: {price} ({change_pct:+.2f}%)")
|
|
print()
|
|
|
|
print("\n🔥 Top Headlines\n")
|
|
for article in result['headlines'][:args.limit]:
|
|
print(f"• [{article['source']}] {article['title']}")
|
|
|
|
|
|
def get_portfolio_metadata() -> dict:
|
|
"""Get metadata for portfolio symbols."""
|
|
path = CONFIG_DIR / "portfolio.csv"
|
|
meta = {}
|
|
if path.exists():
|
|
import csv
|
|
with open(path, 'r') as f:
|
|
for row in csv.DictReader(f):
|
|
sym = row.get('symbol', '').strip().upper()
|
|
if sym:
|
|
meta[sym] = row
|
|
return meta
|
|
|
|
|
|
def get_portfolio_news(
|
|
limit: int = 5,
|
|
max_stocks: int = 5,
|
|
deadline: float | None = None,
|
|
subprocess_timeout: int = 30,
|
|
) -> dict:
|
|
"""Get news for portfolio stocks as data."""
|
|
if not (CONFIG_DIR / "portfolio.csv").exists():
|
|
raise PortfolioError("Portfolio config missing: config/portfolio.csv")
|
|
|
|
# Get symbols from portfolio
|
|
symbols = get_portfolio_symbols()
|
|
if not symbols:
|
|
raise PortfolioError("No portfolio symbols found")
|
|
|
|
# Get metadata
|
|
portfolio_meta = get_portfolio_metadata()
|
|
|
|
# If large portfolio (e.g. > 15 stocks), switch to tiered fetching
|
|
if len(symbols) > 15:
|
|
print(f"⚡ Large portfolio detected ({len(symbols)} stocks); using tiered fetch.", file=sys.stderr)
|
|
return get_large_portfolio_news(
|
|
limit=limit,
|
|
top_movers_count=10,
|
|
deadline=deadline,
|
|
subprocess_timeout=subprocess_timeout,
|
|
portfolio_meta=portfolio_meta
|
|
)
|
|
|
|
# Standard fetching for small portfolios
|
|
news = {
|
|
'fetched_at': datetime.now().isoformat(),
|
|
'stocks': {}
|
|
}
|
|
|
|
# Limit stocks for performance if manual limit set (legacy logic)
|
|
if max_stocks and len(symbols) > max_stocks:
|
|
symbols = symbols[:max_stocks]
|
|
|
|
for symbol in symbols:
|
|
if time_left(deadline) is not None and time_left(deadline) <= 0:
|
|
print("⚠️ Deadline exceeded; returning partial portfolio news", file=sys.stderr)
|
|
break
|
|
if not symbol:
|
|
continue
|
|
|
|
articles = fetch_ticker_news(symbol, limit)
|
|
quotes = fetch_market_data(
|
|
[symbol],
|
|
timeout=subprocess_timeout,
|
|
deadline=deadline,
|
|
)
|
|
|
|
news['stocks'][symbol] = {
|
|
'quote': quotes.get(symbol, {}),
|
|
'articles': articles,
|
|
'info': portfolio_meta.get(symbol, {})
|
|
}
|
|
|
|
return news
|
|
|
|
|
|
def fetch_portfolio_news(args):
|
|
"""Fetch news for portfolio stocks."""
|
|
try:
|
|
deadline = compute_deadline(args.deadline)
|
|
news = get_portfolio_news(
|
|
args.limit,
|
|
args.max_stocks,
|
|
deadline=deadline
|
|
)
|
|
except PortfolioError as exc:
|
|
if not args.json:
|
|
print(f"\n❌ Error: {exc}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.json:
|
|
print(json.dumps(news, indent=2))
|
|
else:
|
|
print(f"\n📊 Portfolio News ({len(news['stocks'])} stocks)\n")
|
|
for symbol, data in news['stocks'].items():
|
|
quote = data.get('quote', {})
|
|
price = quote.get('price')
|
|
prev_close = quote.get('prev_close', 0)
|
|
open_price = quote.get('open', 0)
|
|
|
|
# Calculate daily change
|
|
# If markets are closed (price is null), calculate from last session (prev_close vs day-before close)
|
|
# Since we don't have day-before close, use open -> prev_close as proxy for last session move
|
|
change_pct = 0
|
|
display_price = price or prev_close
|
|
|
|
if price and prev_close and prev_close != 0:
|
|
# Markets open: current price vs prev close
|
|
change_pct = ((price - prev_close) / prev_close) * 100
|
|
elif not price and open_price and prev_close and prev_close != 0:
|
|
# Markets closed: last session change (prev_close vs open)
|
|
change_pct = ((prev_close - open_price) / open_price) * 100
|
|
|
|
emoji = '📈' if change_pct >= 0 else '📉'
|
|
price_str = f"${display_price:.2f}" if isinstance(display_price, (int, float)) else str(display_price)
|
|
|
|
print(f"\n**{symbol}** {emoji} {price_str} ({change_pct:+.2f}%)")
|
|
for article in data['articles'][:3]:
|
|
print(f" • {article['title'][:80]}...")
|
|
def get_portfolio_symbols() -> list[str]:
|
|
"""Get list of portfolio symbols."""
|
|
try:
|
|
result = subprocess.run(
|
|
['python3', str(SCRIPT_DIR / 'portfolio.py'), 'symbols'],
|
|
capture_output=True,
|
|
text=True,
|
|
stdin=subprocess.DEVNULL,
|
|
timeout=10,
|
|
check=False
|
|
)
|
|
if result.returncode == 0:
|
|
return [s.strip() for s in result.stdout.strip().split(',') if s.strip()]
|
|
except Exception:
|
|
pass
|
|
return []
|
|
|
|
|
|
def deduplicate_news(articles: list[dict]) -> list[dict]:
|
|
"""Remove duplicate news by URL, fallback to title+date."""
|
|
seen = set()
|
|
unique = []
|
|
for article in articles:
|
|
url = article.get('link', '')
|
|
if not url:
|
|
key = f"{article.get('title', '')}|{article.get('date', '')}"
|
|
else:
|
|
key = url
|
|
if key not in seen:
|
|
seen.add(key)
|
|
unique.append(article)
|
|
return unique
|
|
|
|
|
|
def get_portfolio_only_news(limit_per_ticker: int = 5) -> dict:
|
|
"""
|
|
Get portfolio news with top 5 gainers and 5 losers, plus news per ticker.
|
|
|
|
Args:
|
|
limit_per_ticker: Max news items per ticker (default: 5)
|
|
|
|
Returns:
|
|
dict with 'gainers', 'losers' (each: list of tickers with price + news)
|
|
"""
|
|
symbols = get_portfolio_symbols()
|
|
if not symbols:
|
|
return {'error': 'No portfolio symbols found', 'gainers': [], 'losers': []}
|
|
|
|
# Fetch prices for all symbols
|
|
quotes = fetch_market_data(symbols)
|
|
|
|
# Build list of (symbol, change_pct)
|
|
tickers_with_prices = []
|
|
for symbol in symbols:
|
|
quote = quotes.get(symbol, {})
|
|
price = quote.get('price')
|
|
prev_close = quote.get('prev_close', 0)
|
|
open_price = quote.get('open', 0)
|
|
|
|
if price and prev_close and prev_close != 0:
|
|
change_pct = ((price - prev_close) / prev_close) * 100
|
|
elif price and open_price and open_price != 0:
|
|
change_pct = ((price - open_price) / open_price) * 100
|
|
else:
|
|
change_pct = 0
|
|
|
|
tickers_with_prices.append({
|
|
'symbol': symbol,
|
|
'price': price,
|
|
'change_pct': change_pct,
|
|
'quote': quote
|
|
})
|
|
|
|
# Sort by change_pct
|
|
sorted_tickers = sorted(tickers_with_prices, key=lambda x: x['change_pct'], reverse=True)
|
|
|
|
# Get top 5 gainers and 5 losers
|
|
gainers = sorted_tickers[:5]
|
|
losers = sorted_tickers[-5:][::-1] # Reverse to show biggest loser first
|
|
|
|
# Fetch news for each ticker
|
|
for ticker_list in [gainers, losers]:
|
|
for ticker in ticker_list:
|
|
symbol = ticker['symbol']
|
|
# Try RSS first
|
|
articles = fetch_ticker_news(symbol, limit_per_ticker)
|
|
if not articles:
|
|
# Fallback to web search if no RSS
|
|
articles = web_search_news(symbol, limit_per_ticker)
|
|
ticker['news'] = deduplicate_news(articles)
|
|
|
|
return {
|
|
'fetched_at': datetime.now().isoformat(),
|
|
'gainers': gainers,
|
|
'losers': losers
|
|
}
|
|
|
|
def get_portfolio_movers(
|
|
max_items: int = 8,
|
|
min_abs_change: float = 1.0,
|
|
deadline: float | None = None,
|
|
subprocess_timeout: int = 30,
|
|
) -> dict:
|
|
"""Return top portfolio movers without fetching news."""
|
|
symbols = get_portfolio_symbols()
|
|
if not symbols:
|
|
return {'error': 'No portfolio symbols found', 'movers': []}
|
|
|
|
try:
|
|
effective_timeout = clamp_timeout(subprocess_timeout, deadline)
|
|
except TimeoutError:
|
|
return {'error': 'Deadline exceeded while fetching portfolio quotes', 'movers': []}
|
|
|
|
quotes = fetch_market_data(symbols, timeout=effective_timeout, deadline=deadline)
|
|
|
|
gainers = []
|
|
losers = []
|
|
for symbol in symbols:
|
|
quote = quotes.get(symbol, {})
|
|
price = quote.get('price')
|
|
prev_close = quote.get('prev_close', 0)
|
|
open_price = quote.get('open', 0)
|
|
|
|
if price and prev_close and prev_close != 0:
|
|
change_pct = ((price - prev_close) / prev_close) * 100
|
|
elif price and open_price and open_price != 0:
|
|
change_pct = ((price - open_price) / open_price) * 100
|
|
else:
|
|
continue
|
|
|
|
item = {'symbol': symbol, 'change_pct': change_pct, 'price': price}
|
|
if change_pct >= min_abs_change:
|
|
gainers.append(item)
|
|
elif change_pct <= -min_abs_change:
|
|
losers.append(item)
|
|
|
|
gainers.sort(key=lambda x: x['change_pct'], reverse=True)
|
|
losers.sort(key=lambda x: x['change_pct'])
|
|
|
|
max_each = max_items // 2
|
|
selected = gainers[:max_each] + losers[:max_each]
|
|
if len(selected) < max_items:
|
|
remaining = max_items - len(selected)
|
|
extra = gainers[max_each:] + losers[max_each:]
|
|
extra.sort(key=lambda x: abs(x['change_pct']), reverse=True)
|
|
selected.extend(extra[:remaining])
|
|
|
|
return {
|
|
'fetched_at': datetime.now().isoformat(),
|
|
'movers': selected[:max_items],
|
|
}
|
|
|
|
|
|
def web_search_news(symbol: str, limit: int = 5) -> list[dict]:
|
|
"""Fallback: search for news via web search."""
|
|
articles = []
|
|
try:
|
|
result = subprocess.run(
|
|
['web-search', f'{symbol} stock news today', '--count', str(limit)],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30,
|
|
check=False
|
|
)
|
|
if result.returncode == 0:
|
|
import json as json_mod
|
|
data = json_mod.loads(result.stdout)
|
|
for item in data.get('results', [])[:limit]:
|
|
articles.append({
|
|
'title': item.get('title', ''),
|
|
'link': item.get('url', ''),
|
|
'source': item.get('site', 'Web'),
|
|
'date': '',
|
|
'description': ''
|
|
})
|
|
except Exception as e:
|
|
print(f"⚠️ Web search failed for {symbol}: {e}", file=sys.stderr)
|
|
return articles
|
|
|
|
|
|
def get_large_portfolio_news(
|
|
limit: int = 3,
|
|
top_movers_count: int = 10,
|
|
deadline: float | None = None,
|
|
subprocess_timeout: int = 30,
|
|
portfolio_meta: dict | None = None,
|
|
) -> dict:
|
|
"""
|
|
Tiered fetch for large portfolios.
|
|
1. Batch fetch prices for ALL stocks (fast).
|
|
2. Identify top movers (gainers/losers).
|
|
3. Fetch news ONLY for top movers.
|
|
"""
|
|
symbols = get_portfolio_symbols()
|
|
if not symbols:
|
|
raise PortfolioError("No portfolio symbols found")
|
|
|
|
# 1. Batch fetch prices
|
|
try:
|
|
effective_timeout = clamp_timeout(subprocess_timeout, deadline)
|
|
except TimeoutError:
|
|
raise PortfolioError("Deadline exceeded before price fetch")
|
|
|
|
# This uses the new yfinance batching
|
|
quotes = fetch_market_data(symbols, timeout=effective_timeout, deadline=deadline)
|
|
|
|
# 2. Identify top movers
|
|
movers = []
|
|
for symbol, data in quotes.items():
|
|
change = data.get('change_percent', 0)
|
|
movers.append((symbol, change, data))
|
|
|
|
# Sort: Absolute change descending? Or Gainers vs Losers?
|
|
# Issue says: "Biggest gainers (top 5), Biggest losers (top 5)"
|
|
|
|
movers.sort(key=lambda x: x[1]) # Sort by change ascending
|
|
|
|
losers = movers[:5] # Bottom 5
|
|
gainers = movers[-5:] # Top 5
|
|
gainers.reverse() # Descending
|
|
|
|
# Combined list for news fetching
|
|
# Ensure uniqueness if < 10 stocks total
|
|
top_symbols = []
|
|
seen = set()
|
|
|
|
for m in gainers + losers:
|
|
sym = m[0]
|
|
if sym not in seen:
|
|
top_symbols.append(sym)
|
|
seen.add(sym)
|
|
|
|
# 3. Fetch news for top movers
|
|
news = {
|
|
'fetched_at': datetime.now().isoformat(),
|
|
'stocks': {},
|
|
'meta': {
|
|
'total_stocks': len(symbols),
|
|
'top_movers_count': len(top_symbols)
|
|
}
|
|
}
|
|
|
|
for symbol in top_symbols:
|
|
if time_left(deadline) is not None and time_left(deadline) <= 0:
|
|
break
|
|
|
|
articles = fetch_ticker_news(symbol, limit)
|
|
quote_data = quotes.get(symbol, {})
|
|
|
|
news['stocks'][symbol] = {
|
|
'quote': quote_data,
|
|
'articles': articles,
|
|
'info': portfolio_meta.get(symbol, {}) if portfolio_meta else {}
|
|
}
|
|
|
|
return news
|
|
|
|
"""Fetch portfolio-only news (top 5 gainers + top 5 losers with news)."""
|
|
result = get_portfolio_only_news(limit_per_ticker=args.limit)
|
|
|
|
if "error" in result:
|
|
print(f"\n❌ Error: {result.get('error', 'Unknown')}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.json:
|
|
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
return
|
|
# Text output
|
|
def format_ticker(ticker: dict):
|
|
symbol = ticker['symbol']
|
|
price = ticker.get('price')
|
|
change = ticker['change_pct']
|
|
emoji = '📈' if change >= 0 else '📉'
|
|
price_str = f"${price:.2f}" if price else 'N/A'
|
|
lines = [f"**{symbol}** {emoji} {price_str} ({change:+.2f}%)"]
|
|
if ticker.get('news'):
|
|
for article in ticker['news'][:args.limit]:
|
|
source = article.get('source', 'Unknown')
|
|
title = article.get('title', '')[:70]
|
|
lines.append(f" • [{source}] {title}...")
|
|
else:
|
|
lines.append(" • No recent news")
|
|
return '\n'.join(lines)
|
|
|
|
print("\n🚀 **Top Gainers**\n")
|
|
for ticker in result['gainers']:
|
|
print(format_ticker(ticker))
|
|
print()
|
|
|
|
print("\n📉 **Top Losers**\n")
|
|
for ticker in result['losers']:
|
|
print(format_ticker(ticker))
|
|
print()
|
|
|
|
|
|
def fetch_portfolio_only(args):
|
|
"""Fetch portfolio-only news (top 5 gainers + top 5 losers with news)."""
|
|
result = get_portfolio_only_news(limit_per_ticker=args.limit)
|
|
|
|
if "error" in result:
|
|
print(f"\n❌ Error: {result.get('error', 'Unknown')}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.json:
|
|
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
return
|
|
# Text output
|
|
def format_ticker(ticker: dict):
|
|
symbol = ticker['symbol']
|
|
price = ticker.get('price')
|
|
change = ticker['change_pct']
|
|
emoji = '📈' if change >= 0 else '📉'
|
|
price_str = f"${price:.2f}" if price else 'N/A'
|
|
lines = [f"**{symbol}** {emoji} {price_str} ({change:+.2f}%)"]
|
|
if ticker.get('news'):
|
|
for article in ticker['news'][:args.limit]:
|
|
source = article.get('source', 'Unknown')
|
|
title = article.get('title', '')[:70]
|
|
lines.append(f" • [{source}] {title}...")
|
|
else:
|
|
lines.append(" • No recent news")
|
|
return '\n'.join(lines)
|
|
|
|
print("\n🚀 **Top Gainers**\n")
|
|
for ticker in result['gainers']:
|
|
print(format_ticker(ticker))
|
|
print()
|
|
|
|
print("\n📉 **Top Losers**\n")
|
|
for ticker in result['losers']:
|
|
print(format_ticker(ticker))
|
|
print()
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='News Fetcher')
|
|
subparsers = parser.add_subparsers(dest='command', required=True)
|
|
|
|
# All news
|
|
all_parser = subparsers.add_parser('all', help='Fetch all news sources')
|
|
all_parser.add_argument('--json', action='store_true', help='Output as JSON')
|
|
all_parser.add_argument('--limit', type=int, default=5, help='Max articles per source')
|
|
all_parser.add_argument('--force', action='store_true', help='Bypass cache')
|
|
all_parser.add_argument('--verbose', '-v', action='store_true', help='Show descriptions')
|
|
all_parser.set_defaults(func=fetch_all_news)
|
|
|
|
# Market news
|
|
market_parser = subparsers.add_parser('market', help='Market overview + headlines')
|
|
market_parser.add_argument('--json', action='store_true', help='Output as JSON')
|
|
market_parser.add_argument('--limit', type=int, default=5, help='Max articles per source')
|
|
market_parser.add_argument('--deadline', type=int, default=None, help='Overall deadline in seconds')
|
|
market_parser.set_defaults(func=fetch_market_news)
|
|
|
|
# Portfolio news
|
|
portfolio_parser = subparsers.add_parser('portfolio', help='News for portfolio stocks')
|
|
portfolio_parser.add_argument('--json', action='store_true', help='Output as JSON')
|
|
portfolio_parser.add_argument('--limit', type=int, default=5, help='Max articles per source')
|
|
portfolio_parser.add_argument('--max-stocks', type=int, default=5, help='Max stocks to fetch (default: 5)')
|
|
portfolio_parser.add_argument('--deadline', type=int, default=None, help='Overall deadline in seconds')
|
|
portfolio_parser.set_defaults(func=fetch_portfolio_news)
|
|
|
|
# Portfolio-only news (top 5 gainers + top 5 losers)
|
|
portfolio_only_parser = subparsers.add_parser('portfolio-only', help='Top 5 gainers + top 5 losers with news')
|
|
portfolio_only_parser.add_argument('--json', action='store_true', help='Output as JSON')
|
|
portfolio_only_parser.add_argument('--limit', type=int, default=5, help='Max news items per ticker')
|
|
portfolio_only_parser.set_defaults(func=fetch_portfolio_only)
|
|
|
|
args = parser.parse_args()
|
|
args.func(args)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|