Initial commit with translated description

This commit is contained in:
2026-03-29 13:18:46 +08:00
commit 37406a2ec4
51 changed files with 9545 additions and 0 deletions

353
scripts/manage_topics.py Normal file
View File

@@ -0,0 +1,353 @@
#!/usr/bin/env python3
"""
Topic management CLI for topic-monitor.
Usage:
python3 manage_topics.py add "Topic Name" --query "search" --keywords "a,b,c"
python3 manage_topics.py list
python3 manage_topics.py edit <id> --frequency hourly
python3 manage_topics.py remove <id>
python3 manage_topics.py test <id>
python3 manage_topics.py discover-feed https://example.com/blog
python3 manage_topics.py import-opml feeds.opml
"""
import sys
import argparse
import re
import json
import xml.etree.ElementTree as ET
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from config import load_config, save_config, get_topic, load_state, get_settings
from monitor import monitor_topic, discover_feed_urls, github_release_feed_url
def generate_id(name: str) -> str:
topic_id = name.lower()
topic_id = re.sub(r'[^\w\s-]', '', topic_id)
topic_id = re.sub(r'[-\s]+', '-', topic_id)
return topic_id.strip('-')
def split_csv(value: str):
if not value:
return []
return [item.strip() for item in value.split(",") if item.strip()]
def ensure_topic_defaults(topic: dict) -> dict:
topic.setdefault("keywords", [])
topic.setdefault("feeds", [])
topic.setdefault("github_repos", [])
topic.setdefault("exclude_keywords", [])
topic.setdefault("required_keywords", [])
topic.setdefault("alert_on_sentiment_shift", False)
topic.setdefault("frequency", "daily")
topic.setdefault("importance_threshold", "medium")
topic.setdefault("channels", ["telegram"])
topic.setdefault("context", "")
topic.setdefault("alert_on", [])
topic.setdefault("ignore_sources", [])
topic.setdefault("boost_sources", [])
return topic
def add_topic(args):
config = load_config()
topic_id = args.id or generate_id(args.name)
existing_ids = [t.get("id") for t in config.get("topics", [])]
if topic_id in existing_ids:
print(f"❌ Topic with ID '{topic_id}' already exists", file=sys.stderr)
sys.exit(1)
feeds = split_csv(args.feeds)
if args.discover_feeds:
discovered = []
for candidate in split_csv(args.discover_feeds):
discovered.extend(discover_feed_urls(candidate))
feeds = list(dict.fromkeys(feeds + discovered))
github_repos = split_csv(args.github_repos)
alert_on = split_csv(args.alert_on)
if github_repos and "github_release" not in alert_on:
alert_on.append("github_release")
topic = ensure_topic_defaults({
"id": topic_id,
"name": args.name,
"query": args.query or "",
"keywords": split_csv(args.keywords),
"feeds": feeds,
"github_repos": github_repos,
"exclude_keywords": split_csv(args.exclude_keywords),
"required_keywords": split_csv(args.required_keywords),
"frequency": args.frequency,
"importance_threshold": args.importance,
"channels": split_csv(args.channels) if args.channels else ["telegram"],
"context": args.context or "",
"alert_on": alert_on,
"alert_on_sentiment_shift": args.alert_on_sentiment_shift,
"ignore_sources": [],
"boost_sources": [],
})
config.setdefault("topics", []).append(topic)
save_config(config)
print(f"✅ Added topic: {args.name} ({topic_id})")
if topic.get("query"):
print(f" Query: {topic['query']}")
if topic.get("feeds"):
print(f" Feeds: {len(topic['feeds'])}")
if topic.get("github_repos"):
print(f" GitHub repos: {', '.join(topic['github_repos'])}")
if topic.get("required_keywords"):
print(f" Required keywords: {', '.join(topic['required_keywords'])}")
if topic.get("exclude_keywords"):
print(f" Exclude keywords: {', '.join(topic['exclude_keywords'])}")
def list_topics(args):
config = load_config()
topics = config.get("topics", [])
if not topics:
print("No topics configured")
return
print(f"\n📋 Configured Topics ({len(topics)})\n")
for topic in topics:
topic = ensure_topic_defaults(topic)
print(f"{'='*60}")
print(f"ID: {topic.get('id')}")
print(f"Name: {topic.get('name')}")
print(f"Query: {topic.get('query') or ''}")
print(f"Keywords: {', '.join(topic.get('keywords', [])) or ''}")
print(f"Feeds: {', '.join(topic.get('feeds', [])) or ''}")
print(f"GitHub: {', '.join(topic.get('github_repos', [])) or ''}")
print(f"Required: {', '.join(topic.get('required_keywords', [])) or ''}")
print(f"Excluded: {', '.join(topic.get('exclude_keywords', [])) or ''}")
print(f"Frequency: {topic.get('frequency')}")
print(f"Importance: {topic.get('importance_threshold')}")
print(f"Channels: {', '.join(topic.get('channels', []))}")
print(f"Sentiment shift alerts: {topic.get('alert_on_sentiment_shift')}")
if topic.get('context'):
print(f"Context: {topic.get('context')}")
print()
def edit_topic(args):
config = load_config()
topics = config.get("topics", [])
topic_idx = None
for idx, topic in enumerate(topics):
if topic.get("id") == args.topic_id:
topic_idx = idx
break
if topic_idx is None:
print(f"❌ Topic '{args.topic_id}' not found", file=sys.stderr)
sys.exit(1)
topic = ensure_topic_defaults(topics[topic_idx])
if args.name:
topic["name"] = args.name
if args.query is not None:
topic["query"] = args.query
if args.keywords is not None:
topic["keywords"] = split_csv(args.keywords)
if args.feeds is not None:
topic["feeds"] = split_csv(args.feeds)
if args.github_repos is not None:
topic["github_repos"] = split_csv(args.github_repos)
if topic["github_repos"] and "github_release" not in topic["alert_on"]:
topic["alert_on"].append("github_release")
if args.exclude_keywords is not None:
topic["exclude_keywords"] = split_csv(args.exclude_keywords)
if args.required_keywords is not None:
topic["required_keywords"] = split_csv(args.required_keywords)
if args.frequency:
topic["frequency"] = args.frequency
if args.importance:
topic["importance_threshold"] = args.importance
if args.channels:
topic["channels"] = split_csv(args.channels)
if args.context is not None:
topic["context"] = args.context
if args.alert_on is not None:
topic["alert_on"] = split_csv(args.alert_on)
if args.alert_on_sentiment_shift is not None:
topic["alert_on_sentiment_shift"] = args.alert_on_sentiment_shift
if args.discover_feeds:
discovered = []
for candidate in split_csv(args.discover_feeds):
discovered.extend(discover_feed_urls(candidate))
topic["feeds"] = list(dict.fromkeys(topic.get("feeds", []) + discovered))
topics[topic_idx] = topic
config["topics"] = topics
save_config(config)
print(f"✅ Updated topic: {topic.get('name')} ({args.topic_id})")
def remove_topic(args):
config = load_config()
topics = config.get("topics", [])
new_topics = [t for t in topics if t.get("id") != args.topic_id]
if len(new_topics) == len(topics):
print(f"❌ Topic '{args.topic_id}' not found", file=sys.stderr)
sys.exit(1)
config["topics"] = new_topics
save_config(config)
print(f"✅ Removed topic: {args.topic_id}")
def test_topic(args):
topic = get_topic(args.topic_id)
if not topic:
print(f"❌ Topic '{args.topic_id}' not found", file=sys.stderr)
sys.exit(1)
print(f"🧪 Testing topic: {topic.get('name')}\n")
state = load_state()
settings = get_settings()
monitor_topic(topic, state, settings, dry_run=True, verbose=True)
def discover_feed(args):
urls = discover_feed_urls(args.url)
if not urls:
print("No feeds discovered", file=sys.stderr)
sys.exit(1)
for url in urls:
print(url)
def import_opml(args):
config = load_config()
config.setdefault("topics", [])
tree = ET.parse(args.opml_file)
root = tree.getroot()
outlines = root.findall(".//outline")
added = 0
for outline in outlines:
xml_url = outline.attrib.get("xmlUrl") or outline.attrib.get("xmlurl")
title = outline.attrib.get("title") or outline.attrib.get("text") or outline.attrib.get("xmlUrl")
html_url = outline.attrib.get("htmlUrl") or outline.attrib.get("htmlurl")
if not xml_url:
continue
topic_name = title or xml_url
topic_id = generate_id(topic_name)
existing = next((t for t in config["topics"] if t.get("id") == topic_id), None)
feed_list = [xml_url]
if html_url:
feed_list.extend([u for u in discover_feed_urls(html_url) if u != xml_url])
if existing:
merged = list(dict.fromkeys(existing.get("feeds", []) + feed_list))
existing["feeds"] = merged
if html_url and not existing.get("query"):
existing["query"] = html_url
else:
topic = ensure_topic_defaults({
"id": topic_id,
"name": topic_name,
"query": html_url or "",
"keywords": [],
"feeds": list(dict.fromkeys(feed_list)),
"github_repos": [],
"exclude_keywords": [],
"required_keywords": [],
"frequency": args.frequency,
"importance_threshold": args.importance,
"channels": split_csv(args.channels) if args.channels else ["telegram"],
"context": args.context or "Imported from OPML",
"alert_on": [],
"alert_on_sentiment_shift": False,
"ignore_sources": [],
"boost_sources": [],
})
config["topics"].append(topic)
added += 1
save_config(config)
print(f"✅ Imported OPML: added {added} topic(s), updated matching topics where needed")
def main():
parser = argparse.ArgumentParser(description="Manage research topics")
subparsers = parser.add_subparsers(dest="command", required=True)
add_parser = subparsers.add_parser("add", help="Add a new topic")
add_parser.add_argument("name", help="Topic name")
add_parser.add_argument("--id", help="Custom topic ID")
add_parser.add_argument("--query", help="Search query")
add_parser.add_argument("--keywords", help="Comma-separated keywords")
add_parser.add_argument("--feeds", help="Comma-separated RSS/Atom feed URLs")
add_parser.add_argument("--discover-feeds", help="Comma-separated web URLs to auto-discover feeds from")
add_parser.add_argument("--github-repos", help="Comma-separated owner/repo values for GitHub release monitoring")
add_parser.add_argument("--exclude-keywords", help="Comma-separated keywords to filter out before scoring")
add_parser.add_argument("--required-keywords", help="Comma-separated keywords that must all appear")
add_parser.add_argument("--frequency", choices=["hourly", "daily", "weekly"], default="daily")
add_parser.add_argument("--importance", choices=["high", "medium", "low"], default="medium")
add_parser.add_argument("--channels", default="telegram", help="Comma-separated channels")
add_parser.add_argument("--context", help="Why this topic matters to you")
add_parser.add_argument("--alert-on", help="Comma-separated alert conditions")
add_parser.add_argument("--alert-on-sentiment-shift", action="store_true", help="Alert when sentiment changes from previous findings")
add_parser.set_defaults(func=add_topic)
list_parser = subparsers.add_parser("list", help="List all topics")
list_parser.set_defaults(func=list_topics)
edit_parser = subparsers.add_parser("edit", help="Edit a topic")
edit_parser.add_argument("topic_id", help="Topic ID to edit")
edit_parser.add_argument("--name", help="New name")
edit_parser.add_argument("--query", help="New query (empty string allowed)")
edit_parser.add_argument("--keywords", help="New keywords")
edit_parser.add_argument("--feeds", help="New feed URLs")
edit_parser.add_argument("--discover-feeds", help="Discover and append feeds from these URLs")
edit_parser.add_argument("--github-repos", help="New GitHub repos")
edit_parser.add_argument("--exclude-keywords", help="New exclude keywords")
edit_parser.add_argument("--required-keywords", help="New required keywords")
edit_parser.add_argument("--frequency", choices=["hourly", "daily", "weekly"])
edit_parser.add_argument("--importance", choices=["high", "medium", "low"])
edit_parser.add_argument("--channels", help="New channels")
edit_parser.add_argument("--context", help="New context")
edit_parser.add_argument("--alert-on", help="New alert conditions")
edit_parser.add_argument("--alert-on-sentiment-shift", action=argparse.BooleanOptionalAction, default=None, help="Toggle sentiment shift alerts")
edit_parser.set_defaults(func=edit_topic)
remove_parser = subparsers.add_parser("remove", help="Remove a topic")
remove_parser.add_argument("topic_id", help="Topic ID to remove")
remove_parser.set_defaults(func=remove_topic)
test_parser = subparsers.add_parser("test", help="Test a topic")
test_parser.add_argument("topic_id", help="Topic ID to test")
test_parser.set_defaults(func=test_topic)
discover_parser = subparsers.add_parser("discover-feed", help="Discover feed URLs from a webpage URL")
discover_parser.add_argument("url", help="Webpage URL to inspect")
discover_parser.set_defaults(func=discover_feed)
opml_parser = subparsers.add_parser("import-opml", help="Import RSS/Atom feeds from an OPML file")
opml_parser.add_argument("opml_file", help="Path to OPML file")
opml_parser.add_argument("--frequency", choices=["hourly", "daily", "weekly"], default="daily")
opml_parser.add_argument("--importance", choices=["high", "medium", "low"], default="medium")
opml_parser.add_argument("--channels", default="telegram", help="Comma-separated channels")
opml_parser.add_argument("--context", help="Context applied to imported topics")
opml_parser.set_defaults(func=import_opml)
args = parser.parse_args()
try:
args.func(args)
except FileNotFoundError as e:
print(f"{e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()