71 lines
2.8 KiB
Python
71 lines
2.8 KiB
Python
import sys
|
|
from pathlib import Path
|
|
import pytest
|
|
from datetime import datetime, timedelta
|
|
|
|
# Add scripts to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
|
|
|
from ranking import calculate_score, rank_headlines, classify_category
|
|
|
|
def test_classify_category():
|
|
assert "macro" in classify_category("Fed signals rate cut")
|
|
assert "equities" in classify_category("Apple earnings beat")
|
|
assert "energy" in classify_category("Oil prices surge")
|
|
assert "tech" in classify_category("AI chip demand remains high")
|
|
assert "geopolitics" in classify_category("US imposes new sanctions on Russia")
|
|
assert classify_category("Weather is nice") == ["general"]
|
|
|
|
def test_calculate_score_impact():
|
|
weights = {"market_impact": 0.4, "novelty": 0.2, "breadth": 0.2, "credibility": 0.1, "diversity": 0.1}
|
|
category_counts = {}
|
|
|
|
high_impact = {"title": "Fed announces emergency rate cut", "source": "Reuters", "published_at": datetime.now().isoformat()}
|
|
low_impact = {"title": "Local coffee shop opens", "source": "Blog", "published_at": datetime.now().isoformat()}
|
|
|
|
score_high = calculate_score(high_impact, weights, category_counts)
|
|
score_low = calculate_score(low_impact, weights, category_counts)
|
|
|
|
assert score_high > score_low
|
|
|
|
def test_rank_headlines_deduplication():
|
|
headlines = [
|
|
{"title": "Fed signals rate cut in March", "source": "WSJ"},
|
|
{"title": "FED SIGNALS RATE CUT IN MARCH!!!", "source": "Reuters"}, # Dupe
|
|
{"title": "Apple earnings are out", "source": "CNBC"}
|
|
]
|
|
|
|
result = rank_headlines(headlines)
|
|
|
|
# After dedupe, we should have 2 unique headlines
|
|
assert result["after_dedupe"] == 2
|
|
# must_read should contain the best ones
|
|
assert len(result["must_read"]) <= 2
|
|
|
|
def test_rank_headlines_sorting():
|
|
headlines = [
|
|
{"title": "Local news", "source": "SmallBlog", "description": "Nothing much"},
|
|
{"title": "FED EMERGENCY RATE CUT", "source": "Bloomberg", "description": "Huge market impact"},
|
|
{"title": "Nvidia Earnings Surprise", "source": "Reuters", "description": "AI demand surges"}
|
|
]
|
|
|
|
result = rank_headlines(headlines)
|
|
|
|
# FED should be first due to macro impact + credibility
|
|
assert "FED" in result["must_read"][0]["title"]
|
|
assert "Nvidia" in result["must_read"][1]["title"]
|
|
|
|
def test_source_cap():
|
|
# Test that we don't have too many items from the same source
|
|
headlines = [
|
|
{"title": f"Story {i}", "source": "Reuters"} for i in range(10)
|
|
]
|
|
|
|
# Default source cap is 2
|
|
result = rank_headlines(headlines)
|
|
|
|
reuters_in_must_read = [h for h in result["must_read"] if h["source"] == "Reuters"]
|
|
reuters_in_scan = [h for h in result["scan"] if h["source"] == "Reuters"]
|
|
|
|
assert len(reuters_in_must_read) + len(reuters_in_scan) <= 2
|