From 679194b6c8ec7fe7912fe4c85845c45e1aec0cc4 Mon Sep 17 00:00:00 2001 From: zlei9 Date: Sun, 29 Mar 2026 09:48:33 +0800 Subject: [PATCH] Initial commit with translated description --- README.md | 51 +++++++++++++++++++ SKILL.md | 44 ++++++++++++++++ _meta.json | 6 +++ scripts/baidu_baike.py | 113 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 214 insertions(+) create mode 100644 README.md create mode 100644 SKILL.md create mode 100644 _meta.json create mode 100644 scripts/baidu_baike.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..cd979cc --- /dev/null +++ b/README.md @@ -0,0 +1,51 @@ +# Baidu Baike Skill + +Query Baidu Baike encyclopedia entries from OpenClaw. + +## Purpose + +This skill enables two main scenarios: + +1. **Direct search by keyword** - Get the default matching entry for a term +2. **Homonym resolution** - When multiple entries share the same name, list them and let user select specific one + +## Quick Start + +```bash +export BAIDU_API_KEY="your_api_key" + +# Scenario 1: Direct search +python3 scripts/baidu_baike.py --search_type=lemmaTitle --search_key="Andy Lau" + +# Scenario 2: List homonyms +python3 scripts/baidu_baike.py --search_type=lemmaList --search_key="Liu Dehua" --top_k=5 + +# Then query specific entry by ID +python3 scripts/baidu_baike.py --search_type=lemmaId --search_key="114923" +``` + +## API + +- `LemmaList`: List entries with same title (for homonym resolution) +- `LemmaContent`: Get detailed entry content by title or ID + +## Workflow for OpenClaw Agent + +1. Extract noun from user query +2. If term likely has homonyms (common names, ambiguous terms), call `LemmaList` first +3. Show user the list with IDs and descriptions +4. User selects entry ID (or agent uses default entry) +5. Call `LemmaContent` with selected ID +6. Return structured entry data to user + +## Response Format + +Returns JSON with: +- `lemma_id`: Entry ID +- `lemma_title`: Entry title +- `lemma_desc`: Short description +- `url`: Baike page URL +- `abstract_plain`: Plain text summary +- `card`: Information cards (attributes) +- `albums`: Image albums +- `pic_url`: Main image URL \ No newline at end of file diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..040fb67 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,44 @@ +--- +name: baidu-baike-data +description: "百度百科组件是一个知识服务工具,用于查询各种名词的权威百科解释。" +homepage: https://baike.baidu.com/ +metadata: { "openclaw": { "emoji": "📖", "requires": { "bins": ["python3"] ,"env":["BAIDU_API_KEY"]},"primaryEnv":"BAIDU_API_KEY" } } +--- + +# Baidu Baike + +Query encyclopedia entries from Baidu Baike. + +## Two Usage Scenarios + +### Scenario 1: Direct Search +Get default matching entry for a keyword. +```bash +python3 scripts/baidu_baike.py --search_type=lemmaTitle --search_key="keyword" +``` + +### Scenario 2: Homonym Resolution +When term has multiple entries, list them and select by ID. +```bash +# List entries with same name +python3 scripts/baidu_baike.py --search_type=lemmaList --search_key="keyword" --top_k=5 + +# Get specific entry by ID +python3 scripts/baidu_baike.py --search_type=lemmaId --search_key="entry_id" +``` + +## API +- LemmaList: List entries with same title +- LemmaContent: Get entry details by title or ID + +## Setup +```bash +export BAIDU_API_KEY="your_api_key" +``` + +## Workflow +1. Extract noun from query +2. For ambiguous terms, call LemmaList first +3. User selects entry from list +4. Call LemmaContent with selected ID +5. Return structured data diff --git a/_meta.json b/_meta.json new file mode 100644 index 0000000..1db68d6 --- /dev/null +++ b/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn7akgt520t01vgs2tzx7yk6m180kt26", + "slug": "baidu-baike-data", + "version": "1.1.0", + "publishedAt": 1770955934667 +} \ No newline at end of file diff --git a/scripts/baidu_baike.py b/scripts/baidu_baike.py new file mode 100644 index 0000000..47748d6 --- /dev/null +++ b/scripts/baidu_baike.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +""" +Baidu Baike Query Script +Query encyclopedia entries from Baidu Baike. +""" + +import os +import sys +import requests +import json +import argparse +from typing import Dict, Any, List + + +class BaiduBaikeClient: + """Baidu Baike API Client""" + + BASE_URL = "https://appbuilder.baidu.com/v2/baike" + + def __init__(self, api_key: str): + self.api_key = api_key + self.headers = { + "Authorization": f"Bearer {api_key}", + "X-Appbuilder-From": "openclaw", + } + + def get_lemma_content(self, search_type: str, search_key: str) -> Dict[str, Any]: + """Get detailed entry content by title or ID.""" + url = f"{self.BASE_URL}/lemma/get_content" + params = {"search_type": search_type, "search_key": search_key} + + response = requests.get(url, params=params, headers=self.headers, timeout=30) + response.raise_for_status() + result = response.json() + + self._check_error(result) + + if "result" in result: + # Remove large fields to reduce output size + exclude_keys = {"summary", "abstract_html", "abstract_structured", + "square_pic_url_wap", "videos", "relations", "star_map"} + return {k: v for k, v in result["result"].items() + if k not in exclude_keys and v is not None} + return {} + + def get_lemma_list(self, lemma_title: str, top_k: int = 5) -> List[Dict[str, Any]]: + """List entries with same title (for homonym resolution).""" + url = f"{self.BASE_URL}/lemma/get_list_by_title" + params = {"lemma_title": lemma_title, "top_k": top_k} + + response = requests.get(url, params=params, headers=self.headers, timeout=30) + response.raise_for_status() + result = response.json() + + self._check_error(result) + return result.get("result", []) + + def _check_error(self, result: Dict[str, Any]) -> None: + if "errno" in result and result["errno"] != 0: + errmsg = result.get("errmsg", "Unknown error") + raise RuntimeError(f"API error: {errmsg} (code: {result['errno']})") + + +def main(): + parser = argparse.ArgumentParser(description="Query Baidu Baike entries") + parser.add_argument( + "--search_type", "-st", + required=True, + choices=["lemmaTitle", "lemmaId", "lemmaList"], + help="Search type: lemmaTitle, lemmaId, or lemmaList" + ) + parser.add_argument( + "--search_key", "-sk", + required=True, + help="Search keyword (entry title or ID)" + ) + parser.add_argument( + "--top_k", "-tk", + type=int, + default=5, + help="Max results for lemmaList (default: 5)" + ) + + args = parser.parse_args() + + api_key = os.getenv("BAIDU_API_KEY") + if not api_key: + print("Error: BAIDU_API_KEY environment variable not set", file=sys.stderr) + sys.exit(1) + + try: + client = BaiduBaikeClient(api_key) + + if args.search_type == "lemmaList": + results = client.get_lemma_list(args.search_key, args.top_k) + else: + results = client.get_lemma_content(args.search_type, args.search_key) + + print(json.dumps(results, ensure_ascii=False, indent=2)) + + except requests.exceptions.RequestException as e: + print(f"Network error: {e}", file=sys.stderr) + sys.exit(1) + except RuntimeError as e: + print(f"API error: {e}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main()