commit 7d2d7d6694ca8b38bcf5e7eb28693cfb7fb4d50c Author: zlei9 Date: Sun Mar 29 14:40:22 2026 +0800 Initial commit with translated description diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..97e5b62 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,28 @@ +--- +name: cctv-news-fetcher +description: "获取和解析给定日期的CCTV新闻联播新闻亮点。" +user-invocable: true +--- + +# CCTV News Fetcher + +This skill allows you to fetch summary titles and content from the CCTV News Broadcast for any specific date. + +## Usage + +You can ask the agent to: +- "Fetch CCTV news for 20250210" +- "Give me the news highlights for yesterday" + +## Instructions + +When the user asks for news from a specific date: +1. Format the date as `YYYYMMDD`. If the user says "yesterday" or "today", calculate the date relative to the current local time. +2. Execute the script at `{baseDir}/scripts/news_crawler.js` using `bun` or `node`. + - Command: `bun {baseDir}/scripts/news_crawler.js ` +3. Parse the JSON output and summarize it for the user. Group news by "Domestic" and "International" if possible based on titles, or just list the highlights. + +## Configuration + +The skill depends on `node-html-parser`. +Ensure `bun` is installed in the environment. diff --git a/_meta.json b/_meta.json new file mode 100644 index 0000000..7eef304 --- /dev/null +++ b/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn7e8pavq30z0e9ys21svajh6580dk8q", + "slug": "cctv-news-fetcher", + "version": "1.0.0", + "publishedAt": 1770012122426 +} \ No newline at end of file diff --git a/examples/example.md b/examples/example.md new file mode 100644 index 0000000..4db6dcc --- /dev/null +++ b/examples/example.md @@ -0,0 +1,20 @@ +# Example Usage + +## Prompt +"获取 20250210 的新闻联播摘要" + +## Internal Execution +The agent will run: +```bash +bun skills/cctv-news-fetcher/scripts/news_crawler.js 20250210 +``` + +## Result +[ + { + "date": "20250210", + "title": "全国铁路完成固定资产投资439亿元", + "content": "央视网消息(新闻联播):1月全国铁路完成固定资产投资439亿元,同比增长3.2%..." + }, + ... +] diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..f3d7176 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,147 @@ +{ + "name": "cctv-news-fetcher", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "cctv-news-fetcher", + "version": "1.0.0", + "dependencies": { + "node-html-parser": "^7.0.2" + } + }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "license": "MIT", + "bin": { + "he": "bin/he" + } + }, + "node_modules/node-html-parser": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-7.0.2.tgz", + "integrity": "sha512-DxodLVh7a6JMkYzWyc8nBX9MaF4M0lLFYkJHlWOiu7+9/I6mwNK9u5TbAMC7qfqDJEPX9OIoWA2A9t4C2l1mUQ==", + "license": "MIT", + "dependencies": { + "css-select": "^5.1.0", + "he": "1.2.0" + } + }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..29217f5 --- /dev/null +++ b/package.json @@ -0,0 +1,9 @@ +{ + "name": "cctv-news-fetcher", + "version": "1.0.0", + "description": "Fetch news from CCTV", + "main": "scripts/news_crawler.js", + "dependencies": { + "node-html-parser": "^7.0.2" + } +} \ No newline at end of file diff --git a/scripts/news_crawler.js b/scripts/news_crawler.js new file mode 100644 index 0000000..2981437 --- /dev/null +++ b/scripts/news_crawler.js @@ -0,0 +1,170 @@ +const { parse } = require('node-html-parser'); + +async function fetchOlderNews(date) { + const url = `https://cctv.cntv.cn/lm/xinwenlianbo/${date}.shtml`; + try { + const response = await fetch(url); + const text = await response.text(); + + const rawList = text.match(/title_array_01\((.*)/g) || []; + const pageUrls = rawList.slice(1).map(item => item.match(/(http.*)/)?.[0].split('\'')[0] || ''); + + const headers = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'Accept-Encoding': 'gzip, deflate', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Cache-Control': 'no-cache', + 'Cookie': 'cna=DLYSGBDthG4CAbRVCNxSxGT6', + 'Host': 'tv.cctv.com', + 'Pragma': 'no-cache', + 'Proxy-Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36' + }; + + const data = await Promise.all(pageUrls.map(async pageUrl => { + try { + const pageResponse = await fetch(pageUrl, { headers }); + const pageText = await pageResponse.text(); + const soup = parse(pageText); + const title = soup.querySelector('h3')?.text.replace('[视频]', '').trim() || ''; + const content = soup.querySelector('.cnt_bd')?.text.replace(/\n/g, ' ').trim() || ''; + return { date, title, content }; + } catch (err) { + console.error(`Error fetching page ${pageUrl}:`, err.message); + return null; + } + })); + + return data.filter(item => item !== null); + } catch (err) { + console.error(`Error fetching older news for ${date}:`, err.message); + return []; + } +} + +async function fetchMidNews(date) { + const url = `https://cctv.cntv.cn/lm/xinwenlianbo/${date}.shtml`; + try { + const response = await fetch(url); + const text = await response.text(); + const soup = parse(text); + + const pageUrls = soup.querySelectorAll('#contentELMT1368521805488378 li a') + .slice(1) + .map(a => a.getAttribute('href') || ''); + + const headers = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'Accept-Encoding': 'gzip, deflate', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Cache-Control': 'no-cache', + 'Cookie': 'cna=DLYSGBDthG4CAbRVCNxSxGT6', + 'Host': 'tv.cctv.com', + 'Pragma': 'no-cache', + 'Proxy-Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36' + }; + + const data = await Promise.all(pageUrls.map(async pageUrl => { + try { + const pageResponse = await fetch(pageUrl, { headers }); + const pageText = await pageResponse.text(); + const soup = parse(pageText); + const title = soup.querySelector('h3')?.text.replace('[视频]', '').trim() || ''; + const content = soup.querySelector('.cnt_bd')?.text.replace(/\n/g, ' ').trim() || ''; + return { date, title, content }; + } catch (err) { + console.error(`Error fetching page ${pageUrl}:`, err.message); + return null; + } + })); + + return data.filter(item => item !== null); + } catch (err) { + console.error(`Error fetching mid news for ${date}:`, err.message); + return []; + } +} + +async function fetchRecentNews(date) { + const url = `https://tv.cctv.com/lm/xwlb/day/${date}.shtml`; + try { + const response = await fetch(url); + const text = await response.text(); + const soup = parse(text); + + const pageUrls = soup.querySelectorAll('li a').slice(1).map(a => a.getAttribute('href') || ''); + + const headers = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'Accept-Encoding': 'gzip, deflate', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Cache-Control': 'no-cache', + 'Cookie': 'cna=DLYSGBDthG4CAbRVCNxSxGT6', + 'Host': 'tv.cctv.com', + 'Pragma': 'no-cache', + 'Proxy-Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36' + }; + + const data = await Promise.all(pageUrls.map(async pageUrl => { + try { + const pageResponse = await fetch(pageUrl, { headers }); + const pageText = await pageResponse.text(); + const soup = parse(pageText); + const title = soup.querySelector('h3')?.text.replace('[视频]', '').trim() || soup.querySelector('.tit')?.text.trim() || ''; + const content = soup.querySelector('.cnt_bd')?.text.replace(/\n/g, ' ').trim() || soup.querySelector('.content_area')?.text.trim() || ''; + return { date, title, content }; + } catch (err) { + console.error(`Error fetching page ${pageUrl}:`, err.message); + return null; + } + })); + + return data.filter(item => item !== null); + } catch (err) { + console.error(`Error fetching recent news for ${date}:`, err.message); + return []; + } +} + +async function main() { + let date = process.argv[2]; + if (!date) { + const today = new Date(); + date = today.toISOString().slice(0, 10).replace(/-/g, ''); + } + + console.log(`Fetching news for date: ${date}...`); + + // Try recent first, as per original logic + let news = await fetchRecentNews(date); + + if (news.length === 0) { + console.log("No news found via recent crawler, trying mid..."); + news = await fetchMidNews(date); + } + + if (news.length === 0) { + console.log("No news found via mid crawler, trying older..."); + news = await fetchOlderNews(date); + } + + console.log(JSON.stringify(news, null, 2)); +} + +if (require.main === module) { + main().catch(err => { + console.error("Critical error:", err); + process.exit(1); + }); +} + +module.exports = { + fetchRecentNews, + fetchMidNews, + fetchOlderNews +};