kesslerio_finance-news/scripts/fetch_news.py

#!/usr/bin/env python3
"""
News Fetcher - Aggregate news from multiple sources.
"""

import argparse
import json
import os
import shutil
import subprocess
import sys
import time
from datetime import datetime, timedelta
from email.utils import parsedate_to_datetime
from pathlib import Path
import ssl
import urllib.error
import urllib.request
import yfinance as yf
import pandas as pd

from utils import clamp_timeout, compute_deadline, ensure_venv, time_left

# Retry configuration
DEFAULT_MAX_RETRIES = 3
DEFAULT_RETRY_DELAY = 1  # Base delay in seconds (exponential backoff)


def fetch_with_retry(
    url: str,
    max_retries: int = DEFAULT_MAX_RETRIES,
    base_delay: float = DEFAULT_RETRY_DELAY,
    timeout: int = 15,
    deadline: float | None = None,
) -> bytes | None:
    """
    Fetch URL content with exponential backoff retry.

    Args:
        url: URL to fetch
        max_retries: Maximum number of retry attempts
        base_delay: Base delay in seconds (exponential backoff: delay * 2^attempt)
        timeout: Request timeout in seconds
        deadline: Overall deadline timestamp

    Returns:
        Response content as bytes (feedparser handles encoding), or None if all retries failed
    """
    last_error = None

    for attempt in range(max_retries + 1):  # +1 because attempt 0 is the first try
        # Check deadline before each attempt
        if time_left(deadline) is not None and time_left(deadline) <= 0:
            print(f"⚠️ Deadline exceeded, skipping fetch: {url}", file=sys.stderr)
            return None

        try:
            req = urllib.request.Request(url, headers={'User-Agent': 'OpenClaw/1.0'})
            with urllib.request.urlopen(req, timeout=timeout, context=SSL_CONTEXT) as response:
                return response.read()
        except urllib.error.URLError as e:
            last_error = e
            if attempt < max_retries:
                delay = base_delay * (2 ** attempt)  # Exponential backoff
                print(f"⚠️ Fetch failed (attempt {attempt + 1}/{max_retries + 1}): {e}. Retrying in {delay}s...", file=sys.stderr)
                time.sleep(delay)
        except TimeoutError:
            last_error = TimeoutError("Request timed out")
            if attempt < max_retries:
                delay = base_delay * (2 ** attempt)
                print(f"⚠️ Timeout (attempt {attempt + 1}/{max_retries + 1}). Retrying in {delay}s...", file=sys.stderr)
                time.sleep(delay)
        except Exception as e:
            last_error = e
            print(f"⚠️ Unexpected error fetching {url}: {e}", file=sys.stderr)
            return None

    print(f"⚠️ All {max_retries + 1} attempts failed for {url}: {last_error}", file=sys.stderr)
    return None

SCRIPT_DIR = Path(__file__).parent
CONFIG_DIR = SCRIPT_DIR.parent / "config"
CACHE_DIR = SCRIPT_DIR.parent / "cache"

# Ensure cache directory exists
CACHE_DIR.mkdir(exist_ok=True)

CA_FILE = (
    os.environ.get("SSL_CERT_FILE")
    or ("/etc/ssl/certs/ca-bundle.crt" if os.path.exists("/etc/ssl/certs/ca-bundle.crt") else None)
    or ("/etc/ssl/certs/ca-certificates.crt" if os.path.exists("/etc/ssl/certs/ca-certificates.crt") else None)
)
SSL_CONTEXT = ssl.create_default_context(cafile=CA_FILE) if CA_FILE else ssl.create_default_context()

DEFAULT_HEADLINE_SOURCES = ["barrons", "ft", "wsj", "cnbc"]
DEFAULT_SOURCE_WEIGHTS = {
    "barrons": 4,
    "ft": 4,
    "wsj": 3,
    "cnbc": 2
}


ensure_venv()

import feedparser


class PortfolioError(Exception):
    """Portfolio configuration or fetch error."""


def ensure_portfolio_config():
    """Copy portfolio.csv.example to portfolio.csv if real file doesn't exist."""
    example_file = CONFIG_DIR / "portfolio.csv.example"
    real_file = CONFIG_DIR / "portfolio.csv"

    if real_file.exists():
        return

    if example_file.exists():
        try:
            shutil.copy(example_file, real_file)
            print(f"📋 Created portfolio.csv from example", file=sys.stderr)
        except PermissionError:
            print(f"⚠️ Cannot create portfolio.csv (read-only environment)", file=sys.stderr)
    else:
        print(f"⚠️ No portfolio.csv or portfolio.csv.example found", file=sys.stderr)


# Initialize user config (copy example if needed)
ensure_portfolio_config()


def get_openbb_binary() -> str:
    """
    Find openbb-quote binary.

    Checks (in order):
    1. OPENBB_QUOTE_BIN environment variable
    2. PATH via shutil.which()

    Returns:
        Path to openbb-quote binary

    Raises:
        RuntimeError: If openbb-quote is not found
    """
    # Check env var override
    env_path = os.environ.get('OPENBB_QUOTE_BIN')
    if env_path:
        if os.path.isfile(env_path) and os.access(env_path, os.X_OK):
            return env_path
        else:
            print(f"⚠️ OPENBB_QUOTE_BIN={env_path} is not a valid executable", file=sys.stderr)

    # Check PATH
    binary = shutil.which('openbb-quote')
    if binary:
        return binary

    # Not found - show helpful error
    raise RuntimeError(
        "openbb-quote not found!\n\n"
        "Installation options:\n"
        "1. Install via pip: pip install openbb\n"
        "2. Use existing install: export OPENBB_QUOTE_BIN=/path/to/openbb-quote\n"
        "3. Add to PATH: export PATH=$PATH:$HOME/.local/bin\n\n"
        "See: https://github.com/kesslerio/finance-news-openclaw-skill#dependencies"
    )


# Cache the binary path on module load
try:
    OPENBB_BINARY = get_openbb_binary()
except RuntimeError as e:
    print(f"❌ {e}", file=sys.stderr)
    OPENBB_BINARY = None


def load_sources():
    """Load source configuration."""
    config_path = CONFIG_DIR / "config.json"
    if config_path.exists():
        with open(config_path, 'r') as f:
            return json.load(f)
    legacy_path = CONFIG_DIR / "sources.json"
    if legacy_path.exists():
        print("⚠️ config/config.json missing; falling back to config/sources.json", file=sys.stderr)
        with open(legacy_path, 'r') as f:
            return json.load(f)
    raise FileNotFoundError("Missing config/config.json")


def _get_best_feed_url(feeds: dict) -> str | None:
    """Get the best feed URL from a feeds configuration dict.

    Uses explicit priority list and validates URLs to avoid selecting
    non-URL values like 'name' or other config keys.

    Args:
        feeds: Dict with feed keys like 'top', 'markets', 'tech'

    Returns:
        Best URL string or None if no valid URL found
    """
    # Priority order for feed types (most relevant first)
    PRIORITY_KEYS = ['top', 'markets', 'headlines', 'breaking']

    for key in PRIORITY_KEYS:
        if key in feeds:
            value = feeds[key]
            # Validate it's a string and starts with http
            if isinstance(value, str) and value.startswith('http'):
                return value

    # Fallback: search all values for valid URLs (skip non-string/non-URL)
    for key, value in feeds.items():
        if key == 'name':
            continue  # Skip 'name' field
        if isinstance(value, str) and value.startswith('http'):
            return value

    return None


def fetch_rss(url: str, limit: int = 10, timeout: int = 15, deadline: float | None = None) -> list[dict]:
    """Fetch and parse RSS/Atom feed using feedparser with retry logic."""
    # Fetch content with retry (returns bytes for feedparser to handle encoding)
    content = fetch_with_retry(url, timeout=timeout, deadline=deadline)
    if content is None:
        return []

    # Parse with feedparser (handles RSS and Atom formats, auto-detects encoding from bytes)
    try:
        parsed = feedparser.parse(content)
    except Exception as e:
        print(f"⚠️ Error parsing feed {url}: {e}", file=sys.stderr)
        return []

    items = []
    for entry in parsed.entries[:limit]:
        # Skip entries without title or link
        title = entry.get('title', '').strip()
        if not title:
            continue

        # Link handling: Atom uses 'link' dict, RSS uses string
        link = entry.get('link', '')
        if isinstance(link, dict):
            link = link.get('href', '').strip()
        if not link:
            continue

        # Date handling: different formats across feeds
        published = entry.get('published', '') or entry.get('updated', '')
        published_at = None
        if published:
            try:
                published_at = parsedate_to_datetime(published).timestamp()
            except Exception:
                published_at = None

        # Description handling: summary vs description
        description = entry.get('summary', '') or entry.get('description', '')

        items.append({
            'title': title,
            'link': link,
            'date': published.strip() if published else '',
            'published_at': published_at,
            'description': (description or '')[:200].strip()
        })

    return items


def _fetch_via_openbb(
    openbb_bin: str,
    symbol: str,
    timeout: int,
    deadline: float | None,
    allow_price_fallback: bool,
) -> dict | None:
    """Fetch single symbol via openbb-quote subprocess."""
    try:
        effective_timeout = clamp_timeout(timeout, deadline)
    except TimeoutError:
        return None

    try:
        result = subprocess.run(
            [openbb_bin, symbol],
            capture_output=True,
            text=True,
            stdin=subprocess.DEVNULL,
            timeout=effective_timeout,
            check=False
        )
        if result.returncode != 0:
            return None

        data = json.loads(result.stdout)

        # Normalize response structure
        if isinstance(data, dict) and "results" in data and isinstance(data["results"], list):
            data = data["results"][0] if data["results"] else {}
        elif isinstance(data, list):
            data = data[0] if data else {}

        if not isinstance(data, dict):
            return None

        # Price fallback: use open or prev_close if price is None
        if allow_price_fallback and data.get("price") is None:
            if data.get("open") is not None:
                data["price"] = data["open"]
            elif data.get("prev_close") is not None:
                data["price"] = data["prev_close"]

        # Calculate change_percent if missing
        if data.get("change_percent") is None and data.get("price") and data.get("prev_close"):
            price = data["price"]
            prev_close = data["prev_close"]
            if prev_close != 0:
                data["change_percent"] = ((price - prev_close) / prev_close) * 100

        data["symbol"] = symbol
        return data

    except Exception:
        return None


def _fetch_via_yfinance(
    symbols: list[str],
    timeout: int,
    deadline: float | None,
) -> dict:
    """Fetch symbols via yfinance batch download (fallback)."""
    results = {}
    if not symbols:
        return results

    try:
        if time_left(deadline) is not None and time_left(deadline) <= 0:
            return results

        tickers = " ".join(symbols)
        df = yf.download(tickers, period="5d", progress=False, threads=True, ignore_tz=True)

        for symbol in symbols:
            try:
                if df.empty:
                    continue

                # Handle yfinance MultiIndex columns (yfinance >= 0.2.0)
                if isinstance(df.columns, pd.MultiIndex):
                    try:
                        s_df = df.xs(symbol, level=1, axis=1, drop_level=True)
                    except (KeyError, AttributeError):
                        continue
                elif len(symbols) == 1:
                    # Flat columns only valid for single-symbol requests
                    s_df = df
                else:
                    # Multi-symbol request but flat columns (only one ticker returned data)
                    # Skip to avoid misattributing prices to wrong symbols
                    continue

                if s_df.empty:
                    continue

                s_df = s_df.dropna(subset=['Close'])
                if s_df.empty:
                    continue

                latest = s_df.iloc[-1]
                price = float(latest['Close'])

                prev_close = 0.0
                change_percent = 0.0
                if len(s_df) > 1:
                    prev_row = s_df.iloc[-2]
                    prev_close = float(prev_row['Close'])
                    if prev_close > 0:
                        change_percent = ((price - prev_close) / prev_close) * 100

                results[symbol] = {
                    "price": price,
                    "change_percent": change_percent,
                    "prev_close": prev_close,
                    "symbol": symbol
                }
            except Exception:
                continue

    except Exception as e:
        print(f"⚠️ yfinance batch failed: {e}", file=sys.stderr)

    return results


def fetch_market_data(
    symbols: list[str],
    timeout: int = 30,
    deadline: float | None = None,
    allow_price_fallback: bool = False,
) -> dict:
    """Fetch market data using openbb-quote (primary) with yfinance fallback."""
    from concurrent.futures import ThreadPoolExecutor, as_completed

    results = {}
    if not symbols:
        return results

    failed_symbols = []

    # 1. Try openbb-quote first (primary source)
    if OPENBB_BINARY:
        def fetch_one(sym):
            return sym, _fetch_via_openbb(
                OPENBB_BINARY, sym, timeout, deadline, allow_price_fallback
            )

        # Parallel fetch with ThreadPoolExecutor
        with ThreadPoolExecutor(max_workers=min(8, len(symbols))) as executor:
            futures = {executor.submit(fetch_one, s): s for s in symbols}
            for future in as_completed(futures):
                try:
                    sym, data = future.result()
                    if data:
                        results[sym] = data
                    else:
                        failed_symbols.append(sym)
                except Exception:
                    failed_symbols.append(futures[future])
    else:
        # No openbb available, all symbols go to yfinance fallback
        print("⚠️ openbb-quote not found, using yfinance fallback", file=sys.stderr)
        failed_symbols = list(symbols)

    # 2. Fallback to yfinance for any symbols that failed
    if failed_symbols:
        yf_results = _fetch_via_yfinance(failed_symbols, timeout, deadline)
        results.update(yf_results)

    return results


def fetch_ticker_news(symbol: str, limit: int = 5) -> list[dict]:
    """Fetch news for a specific ticker via Yahoo Finance RSS."""
    url = f"https://feeds.finance.yahoo.com/rss/2.0/headline?s={symbol}&region=US&lang=en-US"
    return fetch_rss(url, limit)


def get_cached_news(cache_key: str) -> dict | None:
    """Get cached news if fresh (< 15 minutes)."""
    cache_file = CACHE_DIR / f"{cache_key}.json"

    if cache_file.exists():
        mtime = datetime.fromtimestamp(cache_file.stat().st_mtime)
        if datetime.now() - mtime < timedelta(minutes=15):
            with open(cache_file, 'r') as f:
                return json.load(f)

    return None


def save_cache(cache_key: str, data: dict):
    """Save news to cache."""
    cache_file = CACHE_DIR / f"{cache_key}.json"
    with open(cache_file, 'w') as f:
        json.dump(data, f, indent=2, default=str)


def fetch_all_news(args):
    """Fetch news from all configured sources."""
    sources = load_sources()
    cache_key = f"all_news_{datetime.now().strftime('%Y%m%d_%H')}"

    # Check cache first
    if not args.force:
        cached = get_cached_news(cache_key)
        if cached:
            print(json.dumps(cached, indent=2))
            return

    news = {
        'fetched_at': datetime.now().isoformat(),
        'sources': {}
    }

    # Fetch RSS feeds
    for source_id, feeds in sources['rss_feeds'].items():
        # Skip disabled sources
        if not feeds.get('enabled', True):
            continue

        news['sources'][source_id] = {
            'name': feeds.get('name', source_id),
            'articles': []
        }

        for feed_name, feed_url in feeds.items():
            if feed_name in ('name', 'enabled', 'note'):
                continue

            articles = fetch_rss(feed_url, args.limit)
            for article in articles:
                article['feed'] = feed_name
            news['sources'][source_id]['articles'].extend(articles)

    # Save to cache
    save_cache(cache_key, news)

    if args.json:
        print(json.dumps(news, indent=2))
    else:
        for source_id, source_data in news['sources'].items():
            print(f"\n### {source_data['name']}\n")
            for article in source_data['articles'][:args.limit]:
                print(f"• {article['title']}")
                if args.verbose and article.get('description'):
                    print(f"  {article['description'][:100]}...")


def get_market_news(
    limit: int = 5,
    regions: list[str] | None = None,
    max_indices_per_region: int | None = None,
    language: str | None = None,
    deadline: float | None = None,
    rss_timeout: int = 15,
    subprocess_timeout: int = 30,
) -> dict:
    """Get market overview (indices + top headlines) as data."""
    sources = load_sources()
    source_weights = sources.get("source_weights", DEFAULT_SOURCE_WEIGHTS)
    headline_sources = sources.get("headline_sources", DEFAULT_HEADLINE_SOURCES)
    sources_by_lang = sources.get("headline_sources_by_lang", {})
    if language and isinstance(sources_by_lang, dict):
        lang_sources = sources_by_lang.get(language)
        if isinstance(lang_sources, list) and lang_sources:
            headline_sources = lang_sources
    headline_exclude = set(sources.get("headline_exclude", []))

    result = {
        'fetched_at': datetime.now().isoformat(),
        'markets': {},
        'headlines': []
    }

    # Fetch market indices FIRST (fast, important for briefing)
    for region, config in sources['markets'].items():
        if time_left(deadline) is not None and time_left(deadline) <= 0:
            break
        if regions is not None and region not in regions:
            continue

        result['markets'][region] = {
            'name': config['name'],
            'indices': {}
        }

        symbols = config['indices']
        if max_indices_per_region is not None:
            symbols = symbols[:max_indices_per_region]

        for symbol in symbols:
            if time_left(deadline) is not None and time_left(deadline) <= 0:
                break
            data = fetch_market_data(
                [symbol],
                timeout=subprocess_timeout,
                deadline=deadline,
                allow_price_fallback=True,
            )
            if symbol in data:
                result['markets'][region]['indices'][symbol] = {
                    'name': config['index_names'].get(symbol, symbol),
                    'data': data[symbol]
                }

    # Fetch top headlines from preferred sources
    for source in headline_sources:
        if time_left(deadline) is not None and time_left(deadline) <= 0:
            break
        if source in headline_exclude:
            continue
        if source in sources['rss_feeds']:
            feeds = sources['rss_feeds'][source]
            if not feeds.get("enabled", True):
                continue
            feed_url = _get_best_feed_url(feeds)
            if feed_url:
                try:
                    effective_timeout = clamp_timeout(rss_timeout, deadline)
                except TimeoutError:
                    break
                articles = fetch_rss(feed_url, limit, timeout=effective_timeout, deadline=deadline)
                for article in articles:
                    article['source_id'] = source
                    article['source'] = feeds.get('name', source)
                    article['weight'] = source_weights.get(source, 1)
                result['headlines'].extend(articles)

    return result


def fetch_market_news(args):
    """Fetch market overview (indices + top headlines)."""
    deadline = compute_deadline(args.deadline)
    result = get_market_news(args.limit, deadline=deadline)

    if args.json:
        print(json.dumps(result, indent=2))
    else:
        print("\n📊 Market Overview\n")
        for region, data in result['markets'].items():
            print(f"**{data['name']}**")
            for symbol, idx in data['indices'].items():
                if 'data' in idx and idx['data']:
                    price = idx['data'].get('price', 'N/A')
                    change_pct = idx['data'].get('change_percent', 0)
                    emoji = '📈' if change_pct >= 0 else '📉'
                    print(f"  {emoji} {idx['name']}: {price} ({change_pct:+.2f}%)")
            print()

        print("\n🔥 Top Headlines\n")
        for article in result['headlines'][:args.limit]:
            print(f"• [{article['source']}] {article['title']}")


def get_portfolio_metadata() -> dict:
    """Get metadata for portfolio symbols."""
    path = CONFIG_DIR / "portfolio.csv"
    meta = {}
    if path.exists():
        import csv
        with open(path, 'r') as f:
            for row in csv.DictReader(f):
                sym = row.get('symbol', '').strip().upper()
                if sym:
                    meta[sym] = row
    return meta


def get_portfolio_news(
    limit: int = 5,
    max_stocks: int = 5,
    deadline: float | None = None,
    subprocess_timeout: int = 30,
) -> dict:
    """Get news for portfolio stocks as data."""
    if not (CONFIG_DIR / "portfolio.csv").exists():
        raise PortfolioError("Portfolio config missing: config/portfolio.csv")

    # Get symbols from portfolio
    symbols = get_portfolio_symbols()
    if not symbols:
        raise PortfolioError("No portfolio symbols found")

    # Get metadata
    portfolio_meta = get_portfolio_metadata()

    # If large portfolio (e.g. > 15 stocks), switch to tiered fetching
    if len(symbols) > 15:
        print(f"⚡ Large portfolio detected ({len(symbols)} stocks); using tiered fetch.", file=sys.stderr)
        return get_large_portfolio_news(
            limit=limit,
            top_movers_count=10,
            deadline=deadline,
            subprocess_timeout=subprocess_timeout,
            portfolio_meta=portfolio_meta
        )

    # Standard fetching for small portfolios
    news = {
        'fetched_at': datetime.now().isoformat(),
        'stocks': {}
    }

    # Limit stocks for performance if manual limit set (legacy logic)
    if max_stocks and len(symbols) > max_stocks:
        symbols = symbols[:max_stocks]

    for symbol in symbols:
        if time_left(deadline) is not None and time_left(deadline) <= 0:
            print("⚠️ Deadline exceeded; returning partial portfolio news", file=sys.stderr)
            break
        if not symbol:
            continue

        articles = fetch_ticker_news(symbol, limit)
        quotes = fetch_market_data(
            [symbol],
            timeout=subprocess_timeout,
            deadline=deadline,
        )

        news['stocks'][symbol] = {
            'quote': quotes.get(symbol, {}),
            'articles': articles,
            'info': portfolio_meta.get(symbol, {})
        }

    return news


def fetch_portfolio_news(args):
    """Fetch news for portfolio stocks."""
    try:
        deadline = compute_deadline(args.deadline)
        news = get_portfolio_news(
            args.limit,
            args.max_stocks,
            deadline=deadline
        )
    except PortfolioError as exc:
        if not args.json:
            print(f"\n❌ Error: {exc}", file=sys.stderr)
        sys.exit(1)

    if args.json:
        print(json.dumps(news, indent=2))
    else:
        print(f"\n📊 Portfolio News ({len(news['stocks'])} stocks)\n")
        for symbol, data in news['stocks'].items():
            quote = data.get('quote', {})
            price = quote.get('price')
            prev_close = quote.get('prev_close', 0)
            open_price = quote.get('open', 0)

            # Calculate daily change
            # If markets are closed (price is null), calculate from last session (prev_close vs day-before close)
            # Since we don't have day-before close, use open -> prev_close as proxy for last session move
            change_pct = 0
            display_price = price or prev_close

            if price and prev_close and prev_close != 0:
                # Markets open: current price vs prev close
                change_pct = ((price - prev_close) / prev_close) * 100
            elif not price and open_price and prev_close and prev_close != 0:
                # Markets closed: last session change (prev_close vs open)
                change_pct = ((prev_close - open_price) / open_price) * 100

            emoji = '📈' if change_pct >= 0 else '📉'
            price_str = f"${display_price:.2f}" if isinstance(display_price, (int, float)) else str(display_price)

            print(f"\n**{symbol}** {emoji} {price_str} ({change_pct:+.2f}%)")
            for article in data['articles'][:3]:
                print(f"  • {article['title'][:80]}...")
def get_portfolio_symbols() -> list[str]:
    """Get list of portfolio symbols."""
    try:
        result = subprocess.run(
            ['python3', str(SCRIPT_DIR / 'portfolio.py'), 'symbols'],
            capture_output=True,
            text=True,
            stdin=subprocess.DEVNULL,
            timeout=10,
            check=False
        )
        if result.returncode == 0:
            return [s.strip() for s in result.stdout.strip().split(',') if s.strip()]
    except Exception:
        pass
    return []


def deduplicate_news(articles: list[dict]) -> list[dict]:
    """Remove duplicate news by URL, fallback to title+date."""
    seen = set()
    unique = []
    for article in articles:
        url = article.get('link', '')
        if not url:
            key = f"{article.get('title', '')}|{article.get('date', '')}"
        else:
            key = url
        if key not in seen:
            seen.add(key)
            unique.append(article)
    return unique


def get_portfolio_only_news(limit_per_ticker: int = 5) -> dict:
    """
    Get portfolio news with top 5 gainers and 5 losers, plus news per ticker.

    Args:
        limit_per_ticker: Max news items per ticker (default: 5)

    Returns:
        dict with 'gainers', 'losers' (each: list of tickers with price + news)
    """
    symbols = get_portfolio_symbols()
    if not symbols:
        return {'error': 'No portfolio symbols found', 'gainers': [], 'losers': []}

    # Fetch prices for all symbols
    quotes = fetch_market_data(symbols)

    # Build list of (symbol, change_pct)
    tickers_with_prices = []
    for symbol in symbols:
        quote = quotes.get(symbol, {})
        price = quote.get('price')
        prev_close = quote.get('prev_close', 0)
        open_price = quote.get('open', 0)

        if price and prev_close and prev_close != 0:
            change_pct = ((price - prev_close) / prev_close) * 100
        elif price and open_price and open_price != 0:
            change_pct = ((price - open_price) / open_price) * 100
        else:
            change_pct = 0

        tickers_with_prices.append({
            'symbol': symbol,
            'price': price,
            'change_pct': change_pct,
            'quote': quote
        })

    # Sort by change_pct
    sorted_tickers = sorted(tickers_with_prices, key=lambda x: x['change_pct'], reverse=True)

    # Get top 5 gainers and 5 losers
    gainers = sorted_tickers[:5]
    losers = sorted_tickers[-5:][::-1]  # Reverse to show biggest loser first

    # Fetch news for each ticker
    for ticker_list in [gainers, losers]:
        for ticker in ticker_list:
            symbol = ticker['symbol']
            # Try RSS first
            articles = fetch_ticker_news(symbol, limit_per_ticker)
            if not articles:
                # Fallback to web search if no RSS
                articles = web_search_news(symbol, limit_per_ticker)
            ticker['news'] = deduplicate_news(articles)

    return {
        'fetched_at': datetime.now().isoformat(),
        'gainers': gainers,
        'losers': losers
    }

def get_portfolio_movers(
    max_items: int = 8,
    min_abs_change: float = 1.0,
    deadline: float | None = None,
    subprocess_timeout: int = 30,
) -> dict:
    """Return top portfolio movers without fetching news."""
    symbols = get_portfolio_symbols()
    if not symbols:
        return {'error': 'No portfolio symbols found', 'movers': []}

    try:
        effective_timeout = clamp_timeout(subprocess_timeout, deadline)
    except TimeoutError:
        return {'error': 'Deadline exceeded while fetching portfolio quotes', 'movers': []}

    quotes = fetch_market_data(symbols, timeout=effective_timeout, deadline=deadline)

    gainers = []
    losers = []
    for symbol in symbols:
        quote = quotes.get(symbol, {})
        price = quote.get('price')
        prev_close = quote.get('prev_close', 0)
        open_price = quote.get('open', 0)

        if price and prev_close and prev_close != 0:
            change_pct = ((price - prev_close) / prev_close) * 100
        elif price and open_price and open_price != 0:
            change_pct = ((price - open_price) / open_price) * 100
        else:
            continue

        item = {'symbol': symbol, 'change_pct': change_pct, 'price': price}
        if change_pct >= min_abs_change:
            gainers.append(item)
        elif change_pct <= -min_abs_change:
            losers.append(item)

    gainers.sort(key=lambda x: x['change_pct'], reverse=True)
    losers.sort(key=lambda x: x['change_pct'])

    max_each = max_items // 2
    selected = gainers[:max_each] + losers[:max_each]
    if len(selected) < max_items:
        remaining = max_items - len(selected)
        extra = gainers[max_each:] + losers[max_each:]
        extra.sort(key=lambda x: abs(x['change_pct']), reverse=True)
        selected.extend(extra[:remaining])

    return {
        'fetched_at': datetime.now().isoformat(),
        'movers': selected[:max_items],
    }


def web_search_news(symbol: str, limit: int = 5) -> list[dict]:
    """Fallback: search for news via web search."""
    articles = []
    try:
        result = subprocess.run(
            ['web-search', f'{symbol} stock news today', '--count', str(limit)],
            capture_output=True,
            text=True,
            timeout=30,
            check=False
        )
        if result.returncode == 0:
            import json as json_mod
            data = json_mod.loads(result.stdout)
            for item in data.get('results', [])[:limit]:
                articles.append({
                    'title': item.get('title', ''),
                    'link': item.get('url', ''),
                    'source': item.get('site', 'Web'),
                    'date': '',
                    'description': ''
                })
    except Exception as e:
        print(f"⚠️ Web search failed for {symbol}: {e}", file=sys.stderr)
    return articles


def get_large_portfolio_news(
    limit: int = 3,
    top_movers_count: int = 10,
    deadline: float | None = None,
    subprocess_timeout: int = 30,
    portfolio_meta: dict | None = None,
) -> dict:
    """
    Tiered fetch for large portfolios.
    1. Batch fetch prices for ALL stocks (fast).
    2. Identify top movers (gainers/losers).
    3. Fetch news ONLY for top movers.
    """
    symbols = get_portfolio_symbols()
    if not symbols:
        raise PortfolioError("No portfolio symbols found")

    # 1. Batch fetch prices
    try:
        effective_timeout = clamp_timeout(subprocess_timeout, deadline)
    except TimeoutError:
         raise PortfolioError("Deadline exceeded before price fetch")

    # This uses the new yfinance batching
    quotes = fetch_market_data(symbols, timeout=effective_timeout, deadline=deadline)

    # 2. Identify top movers
    movers = []
    for symbol, data in quotes.items():
        change = data.get('change_percent', 0)
        movers.append((symbol, change, data))

    # Sort: Absolute change descending? Or Gainers vs Losers?
    # Issue says: "Biggest gainers (top 5), Biggest losers (top 5)"

    movers.sort(key=lambda x: x[1]) # Sort by change ascending

    losers = movers[:5] # Bottom 5
    gainers = movers[-5:] # Top 5
    gainers.reverse() # Descending

    # Combined list for news fetching
    # Ensure uniqueness if < 10 stocks total
    top_symbols = []
    seen = set()

    for m in gainers + losers:
        sym = m[0]
        if sym not in seen:
            top_symbols.append(sym)
            seen.add(sym)

    # 3. Fetch news for top movers
    news = {
        'fetched_at': datetime.now().isoformat(),
        'stocks': {},
        'meta': {
            'total_stocks': len(symbols),
            'top_movers_count': len(top_symbols)
        }
    }

    for symbol in top_symbols:
        if time_left(deadline) is not None and time_left(deadline) <= 0:
            break

        articles = fetch_ticker_news(symbol, limit)
        quote_data = quotes.get(symbol, {})

        news['stocks'][symbol] = {
            'quote': quote_data,
            'articles': articles,
            'info': portfolio_meta.get(symbol, {}) if portfolio_meta else {}
        }

    return news

    """Fetch portfolio-only news (top 5 gainers + top 5 losers with news)."""
    result = get_portfolio_only_news(limit_per_ticker=args.limit)

    if "error" in result:
        print(f"\n❌ Error: {result.get('error', 'Unknown')}", file=sys.stderr)
        sys.exit(1)

    if args.json:
        print(json.dumps(result, indent=2, ensure_ascii=False))
        return
    # Text output
    def format_ticker(ticker: dict):
        symbol = ticker['symbol']
        price = ticker.get('price')
        change = ticker['change_pct']
        emoji = '📈' if change >= 0 else '📉'
        price_str = f"${price:.2f}" if price else 'N/A'
        lines = [f"**{symbol}** {emoji} {price_str} ({change:+.2f}%)"]
        if ticker.get('news'):
            for article in ticker['news'][:args.limit]:
                source = article.get('source', 'Unknown')
                title = article.get('title', '')[:70]
                lines.append(f"  • [{source}] {title}...")
        else:
            lines.append("  • No recent news")
        return '\n'.join(lines)

    print("\n🚀 **Top Gainers**\n")
    for ticker in result['gainers']:
        print(format_ticker(ticker))
        print()

    print("\n📉 **Top Losers**\n")
    for ticker in result['losers']:
        print(format_ticker(ticker))
        print()


def fetch_portfolio_only(args):
    """Fetch portfolio-only news (top 5 gainers + top 5 losers with news)."""
    result = get_portfolio_only_news(limit_per_ticker=args.limit)

    if "error" in result:
        print(f"\n❌ Error: {result.get('error', 'Unknown')}", file=sys.stderr)
        sys.exit(1)

    if args.json:
        print(json.dumps(result, indent=2, ensure_ascii=False))
        return
    # Text output
    def format_ticker(ticker: dict):
        symbol = ticker['symbol']
        price = ticker.get('price')
        change = ticker['change_pct']
        emoji = '📈' if change >= 0 else '📉'
        price_str = f"${price:.2f}" if price else 'N/A'
        lines = [f"**{symbol}** {emoji} {price_str} ({change:+.2f}%)"]
        if ticker.get('news'):
            for article in ticker['news'][:args.limit]:
                source = article.get('source', 'Unknown')
                title = article.get('title', '')[:70]
                lines.append(f"  • [{source}] {title}...")
        else:
            lines.append("  • No recent news")
        return '\n'.join(lines)

    print("\n🚀 **Top Gainers**\n")
    for ticker in result['gainers']:
        print(format_ticker(ticker))
        print()

    print("\n📉 **Top Losers**\n")
    for ticker in result['losers']:
        print(format_ticker(ticker))
        print()


def main():
    parser = argparse.ArgumentParser(description='News Fetcher')
    subparsers = parser.add_subparsers(dest='command', required=True)

    # All news
    all_parser = subparsers.add_parser('all', help='Fetch all news sources')
    all_parser.add_argument('--json', action='store_true', help='Output as JSON')
    all_parser.add_argument('--limit', type=int, default=5, help='Max articles per source')
    all_parser.add_argument('--force', action='store_true', help='Bypass cache')
    all_parser.add_argument('--verbose', '-v', action='store_true', help='Show descriptions')
    all_parser.set_defaults(func=fetch_all_news)

    # Market news
    market_parser = subparsers.add_parser('market', help='Market overview + headlines')
    market_parser.add_argument('--json', action='store_true', help='Output as JSON')
    market_parser.add_argument('--limit', type=int, default=5, help='Max articles per source')
    market_parser.add_argument('--deadline', type=int, default=None, help='Overall deadline in seconds')
    market_parser.set_defaults(func=fetch_market_news)

    # Portfolio news
    portfolio_parser = subparsers.add_parser('portfolio', help='News for portfolio stocks')
    portfolio_parser.add_argument('--json', action='store_true', help='Output as JSON')
    portfolio_parser.add_argument('--limit', type=int, default=5, help='Max articles per source')
    portfolio_parser.add_argument('--max-stocks', type=int, default=5, help='Max stocks to fetch (default: 5)')
    portfolio_parser.add_argument('--deadline', type=int, default=None, help='Overall deadline in seconds')
    portfolio_parser.set_defaults(func=fetch_portfolio_news)

    # Portfolio-only news (top 5 gainers + top 5 losers)
    portfolio_only_parser = subparsers.add_parser('portfolio-only', help='Top 5 gainers + top 5 losers with news')
    portfolio_only_parser.add_argument('--json', action='store_true', help='Output as JSON')
    portfolio_only_parser.add_argument('--limit', type=int, default=5, help='Max news items per ticker')
    portfolio_only_parser.set_defaults(func=fetch_portfolio_only)

    args = parser.parse_args()
    args.func(args)


if __name__ == '__main__':
    main()