feat(repo): 整理 Qoder Skills 和 MCP 配置到仓库

- 添加 5 个用户级别 Skills: - auto-commit: 自动 Git 提交 - karpathy-guidelines: 编码规范指南 - opencli-websearch: 多源网络搜索 - pdf-reader: PDF 内容提取 - repo-analyzer: 项目深度分析 - 添加 Playwright MCP 配置 (21 个浏览器自动化工具) - 创建完整的 README.md 文档说明
2026-04-18 11:17:41 +08:00
commit c3ea38c045
33 changed files with 2677 additions and 0 deletions
--- a/skills/opencli-websearch/scripts/download_content.py
+++ b/skills/opencli-websearch/scripts/download_content.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+"""
+使用 OpenCLI web read 下载文档内容
+"""
+
+import os
+import sys
+import json
+import hashlib
+import subprocess
+import argparse
+from pathlib import Path
+from typing import Optional, List
+from urllib.parse import urlparse
+
+
+def download_with_opencli(url: str, output_dir: str, timeout: int = 60) -> Optional[str]:
+    """
+    使用 OpenCLI web read 下载文档内容
+    
+    Args:
+        url: 要下载的 URL
+        output_dir: 输出目录
+        timeout: 超时时间（秒）
+    
+    Returns:
+        下载文件的本地路径，失败返回 None
+    """
+    # 生成文件名
+    url_hash = hashlib.md5(url.encode()).hexdigest()[:12]
+    parsed = urlparse(url)
+    domain = parsed.netloc.replace(".", "_")
+    filename = f"{domain}_{url_hash}.md"
+    output_path = os.path.join(output_dir, filename)
+    
+    # 构建命令
+    cmd = ["opencli", "web", "read", "--url", url, "--output", output_path]
+    
+    print(f"下载: {url}")
+    print(f"输出: {output_path}")
+    
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=timeout
+        )
+        
+        if result.returncode == 0:
+            if os.path.exists(output_path):
+                file_size = os.path.getsize(output_path)
+                print(f"✓ 成功下载 ({file_size} bytes)")
+                return output_path
+            else:
+                print(f"✗ 文件未生成")
+                return None
+        else:
+            print(f"✗ 下载失败: {result.stderr[:200]}")
+            return None
+            
+    except subprocess.TimeoutExpired:
+        print(f"✗ 下载超时")
+        return None
+    except Exception as e:
+        print(f"✗ 错误: {str(e)}")
+        return None
+
+
+def batch_download(urls: List[str], output_dir: str, max_workers: int = 3) -> dict:
+    """
+    批量下载多个 URL
+    
+    Args:
+        urls: URL 列表
+        output_dir: 输出目录
+        max_workers: 最大并行数
+    
+    Returns:
+        下载结果字典 {url: local_path or None}
+    """
+    from concurrent.futures import ThreadPoolExecutor, as_completed
+    
+    results = {}
+    
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_to_url = {
+            executor.submit(download_with_opencli, url, output_dir): url 
+            for url in urls
+        }
+        
+        for future in as_completed(future_to_url):
+            url = future_to_url[future]
+            try:
+                results[url] = future.result()
+            except Exception as e:
+                print(f"✗ {url} 异常: {str(e)}")
+                results[url] = None
+    
+    return results
+
+
+def load_results_from_search(search_dir: str) -> List[str]:
+    """
+    从之前的搜索结果中加载 URL 列表
+    
+    Args:
+        search_dir: 搜索结果目录
+    
+    Returns:
+        URL 列表
+    """
+    results_file = os.path.join(search_dir, "results.json")
+    
+    if not os.path.exists(results_file):
+        print(f"未找到结果文件: {results_file}")
+        return []
+    
+    with open(results_file, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    
+    urls = []
+    for source, result in data.items():
+        if result.get("success") and result.get("output"):
+            # 简单解析输出中的 URL
+            output = result["output"]
+            for line in output.split("\n"):
+                if "url:" in line.lower() or "http" in line:
+                    # 提取 URL
+                    import re
+                    url_match = re.search(r'https?://[^\s\'"<>]+', line)
+                    if url_match:
+                        urls.append(url_match.group())
+    
+    return list(set(urls))
+
+
+def main():
+    parser = argparse.ArgumentParser(description="使用 OpenCLI 下载文档内容")
+    parser.add_argument("--url", help="单个 URL 下载")
+    parser.add_argument("--urls", nargs="+", help="多个 URL 下载")
+    parser.add_argument("--from-search", help="从搜索结果目录加载 URL")
+    parser.add_argument("--output-dir", required=True, help="输出目录")
+    parser.add_argument("--max-workers", type=int, default=3, help="最大并行数")
+    
+    args = parser.parse_args()
+    
+    # 确保输出目录存在
+    os.makedirs(args.output_dir, exist_ok=True)
+    
+    # 收集 URL 列表
+    urls = []
+    
+    if args.url:
+        urls.append(args.url)
+    
+    if args.urls:
+        urls.extend(args.urls)
+    
+    if args.from_search:
+        search_urls = load_results_from_search(args.from_search)
+        urls.extend(search_urls)
+        print(f"从搜索结果加载 {len(search_urls)} 个 URL")
+    
+    if not urls:
+        print("错误: 未提供 URL")
+        return 1
+    
+    # 去重
+    urls = list(set(urls))
+    print(f"\n共 {len(urls)} 个唯一 URL 待下载\n")
+    
+    # 批量下载
+    results = batch_download(urls, args.output_dir, args.max_workers)
+    
+    # 统计
+    success_count = sum(1 for v in results.values() if v is not None)
+    print(f"\n{'='*60}")
+    print(f"下载完成: {success_count}/{len(urls)} 成功")
+    print(f"{'='*60}")
+    
+    # 保存下载记录
+    record_file = os.path.join(args.output_dir, "download_record.json")
+    with open(record_file, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    
+    print(f"下载记录: {record_file}")
+    
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/skills/opencli-websearch/scripts/unified_search.py
+++ b/skills/opencli-websearch/scripts/unified_search.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+"""
+统一搜索入口 - 整合 Qoder WebSearch 和 OpenCLI 多源搜索
+"""
+
+import os
+import sys
+import json
+import hashlib
+import subprocess
+import argparse
+from datetime import datetime
+from typing import List, Dict, Optional
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+
+def create_storage_dir(query: str) -> str:
+    """创建临时存储目录"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    query_hash = hashlib.md5(query.encode()).hexdigest()[:8]
+    dir_name = f"{timestamp}_{query_hash}"
+    storage_path = os.path.expanduser(f"~/Downloads/opencli-websearch-data/{dir_name}")
+    
+    os.makedirs(storage_path, exist_ok=True)
+    os.makedirs(os.path.join(storage_path, "content"), exist_ok=True)
+    
+    return storage_path
+
+
+def run_opencli_search(source: str, query: str, limit: int = 5) -> Dict:
+    """执行 OpenCLI 搜索"""
+    if source == "hackernews":
+        cmd = ["opencli", "hackernews", "top", "--limit", str(limit)]
+    else:
+        cmd = ["opencli", source, "search", query, "--limit", str(limit)]
+    
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+        
+        return {
+            "source": source,
+            "success": result.returncode == 0,
+            "output": result.stdout if result.returncode == 0 else None,
+            "error": result.stderr if result.returncode != 0 else None
+        }
+    except subprocess.TimeoutExpired:
+        return {"source": source, "success": False, "output": None, "error": "Timeout"}
+    except Exception as e:
+        return {"source": source, "success": False, "output": None, "error": str(e)}
+
+
+def run_qoder_websearch(query: str) -> Dict:
+    """
+    执行 Qoder WebSearch
+    注意：此函数需要 Qoder 环境支持，实际使用时通过 Qoder 工具调用
+    """
+    # 这是一个占位符，实际使用时 Qoder 会直接调用 websearch
+    # 这里返回一个标记，表示需要 Qoder 处理
+    return {
+        "source": "qoder_websearch",
+        "success": True,
+        "output": "[Qoder WebSearch 结果将在此处合并]",
+        "error": None,
+        "needs_qoder": True
+    }
+
+
+def parallel_search(query: str, sources: List[str], use_qoder: bool = True) -> Dict[str, Dict]:
+    """并行执行多源搜索"""
+    results = {}
+    
+    # 如果启用 Qoder，先标记
+    if use_qoder:
+        results["qoder_websearch"] = run_qoder_websearch(query)
+    
+    # 并行执行 OpenCLI 搜索
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        future_to_source = {
+            executor.submit(run_opencli_search, source, query, 5): source
+            for source in sources
+        }
+        
+        for future in as_completed(future_to_source):
+            source = future_to_source[future]
+            try:
+                results[source] = future.result()
+            except Exception as e:
+                results[source] = {"source": source, "success": False, "output": None, "error": str(e)}
+    
+    return results
+
+
+def select_sources(query: str, intent: Optional[str] = None) -> List[str]:
+    """根据查询意图选择数据源"""
+    sources = []
+    
+    # 数据源配置
+    SOURCE_CONFIG = {
+        "academic": ["arxiv"],
+        "technical": ["stackoverflow", "hackernews", "gitee"],
+        "chinese": ["36kr", "zhihu", "xiaohongshu"],
+        "news": ["bbc", "reuters"],
+        "general": ["google"]
+    }
+    
+    if intent and intent in SOURCE_CONFIG:
+        sources = SOURCE_CONFIG[intent]
+    else:
+        # 自动判断
+        query_lower = query.lower()
+        
+        # 学术关键词
+        if any(kw in query_lower for kw in ["paper", "论文", "arxiv", "research", "study"]):
+            sources.extend(SOURCE_CONFIG["academic"])
+        
+        # 技术关键词
+        if any(kw in query_lower for kw in ["python", "javascript", "code", "programming", "bug", "error"]):
+            sources.extend(["stackoverflow", "hackernews"])
+        
+        # 中文关键词 - 优先公开源
+        if any('\u4e00' <= char <= '\u9fff' for char in query):
+            sources.extend(["36kr"])
+        
+        # 默认源
+        if not sources:
+            sources = ["arxiv", "stackoverflow", "36kr", "hackernews"]
+    
+    return list(set(sources))
+
+
+def generate_report(query: str, results: Dict, storage_path: str) -> str:
+    """生成 Markdown 格式搜索报告"""
+    report = []
+    
+    report.append(f"# 搜索报告: {query}\n")
+    report.append(f"**搜索时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    report.append(f"**存储位置**: `{storage_path}`\n")
+    
+    # 统计
+    success_count = sum(1 for r in results.values() if r.get("success"))
+    report.append(f"**数据源**: {len(results)} 个 | **成功**: {success_count} 个\n")
+    
+    report.append("---\n")
+    
+    # 按来源分类展示
+    for source, result in sorted(results.items()):
+        status_icon = "✅" if result.get("success") else "❌"
+        report.append(f"\n## {status_icon} {source.upper()}\n")
+        
+        if result.get("success") and result.get("output"):
+            output = result["output"]
+            # 截断过长输出
+            if len(output) > 2000:
+                output = output[:2000] + "\n\n... (内容已截断)"
+            report.append(f"```\n{output}\n```\n")
+        elif result.get("error"):
+            report.append(f"```\n错误: {result['error'][:200]}\n```\n")
+        
+        if result.get("needs_qoder"):
+            report.append("> 📝 **注意**: Qoder WebSearch 结果将通过 Qoder 工具直接提供\n")
+    
+    report.append("\n---\n")
+    report.append("*由 OpenCLI WebSearch Skill 生成*\n")
+    
+    return "\n".join(report)
+
+
+def save_results(storage_path: str, query: str, results: Dict, report: str):
+    """保存搜索结果"""
+    # 保存元数据
+    metadata = {
+        "query": query,
+        "timestamp": datetime.now().isoformat(),
+        "sources": list(results.keys()),
+        "success_count": sum(1 for r in results.values() if r.get("success"))
+    }
+    
+    with open(os.path.join(storage_path, "metadata.json"), "w", encoding="utf-8") as f:
+        json.dump(metadata, f, ensure_ascii=False, indent=2)
+    
+    # 保存原始结果
+    with open(os.path.join(storage_path, "results.json"), "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    
+    # 保存报告
+    with open(os.path.join(storage_path, "report.md"), "w", encoding="utf-8") as f:
+        f.write(report)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="统一搜索入口 - Qoder + OpenCLI")
+    parser.add_argument("query", help="搜索查询")
+    parser.add_argument("--intent", choices=["academic", "technical", "chinese", "news", "general"],
+                        help="搜索意图类型")
+    parser.add_argument("--sources", nargs="+", help="指定 OpenCLI 数据源")
+    parser.add_argument("--no-qoder", action="store_true", help="不使用 Qoder WebSearch")
+    parser.add_argument("--output", help="输出目录")
+    
+    args = parser.parse_args()
+    
+    # 创建存储目录
+    if args.output:
+        storage_path = args.output
+        os.makedirs(storage_path, exist_ok=True)
+    else:
+        storage_path = create_storage_dir(args.query)
+    
+    print(f"📁 存储路径: {storage_path}\n")
+    
+    # 选择数据源
+    if args.sources:
+        sources = args.sources
+    else:
+        sources = select_sources(args.query, args.intent)
+    
+    print(f"🔍 OpenCLI 数据源: {', '.join(sources)}")
+    print(f"🔍 Qoder WebSearch: {'禁用' if args.no_qoder else '启用'}\n")
+    
+    # 执行并行搜索
+    print("⏳ 正在并行搜索...\n")
+    results = parallel_search(args.query, sources, use_qoder=not args.no_qoder)
+    
+    # 生成报告
+    report = generate_report(args.query, results, storage_path)
+    
+    # 保存结果
+    save_results(storage_path, args.query, results, report)
+    
+    # 输出报告
+    print(report)
+    
+    print(f"\n✅ 搜索完成！")
+    print(f"📄 报告: {os.path.join(storage_path, 'report.md')}")
+    print(f"📊 数据: {os.path.join(storage_path, 'results.json')}")
+    
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/skills/opencli-websearch/scripts/websearch.py
+++ b/skills/opencli-websearch/scripts/websearch.py
@@ -0,0 +1,322 @@
+#!/usr/bin/env python3
+"""
+OpenCLI 多源 Web 搜索脚本
+支持 Qoder WebSearch 和 OpenCLI 并行搜索
+"""
+
+import os
+import sys
+import json
+import hashlib
+import subprocess
+import argparse
+from datetime import datetime
+from pathlib import Path
+from typing import List, Dict, Optional
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+
+# 数据源配置
+SOURCES = {
+    "academic": {
+        "arxiv": {"type": "public", "limit": 5},
+    },
+    "technical": {
+        "stackoverflow": {"type": "public", "limit": 5},
+        "hackernews": {"type": "public", "limit": 5},
+        "gitee": {"type": "public", "limit": 5},
+    },
+    "chinese": {
+        "zhihu": {"type": "browser", "limit": 5},
+        "xiaohongshu": {"type": "browser", "limit": 5},
+        "36kr": {"type": "public", "limit": 5},
+    },
+    "news": {
+        "bbc": {"type": "public", "limit": 5},
+        "reuters": {"type": "public", "limit": 5},
+    },
+    "general": {
+        "google": {"type": "browser", "limit": 5},
+    }
+}
+
+
+def create_storage_dir(query: str) -> str:
+    """创建临时存储目录"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    query_hash = hashlib.md5(query.encode()).hexdigest()[:8]
+    dir_name = f"{timestamp}_{query_hash}"
+    storage_path = os.path.expanduser(f"~/Downloads/opencli-websearch-data/{dir_name}")
+    
+    os.makedirs(storage_path, exist_ok=True)
+    os.makedirs(os.path.join(storage_path, "content"), exist_ok=True)
+    
+    return storage_path
+
+
+def run_opencli_search(source: str, query: str, limit: int = 5) -> Dict:
+    """执行 OpenCLI 搜索"""
+    cmd = ["opencli", source, "search", query, "--limit", str(limit)]
+    
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+        
+        if result.returncode == 0:
+            return {
+                "source": source,
+                "success": True,
+                "output": result.stdout,
+                "error": None
+            }
+        else:
+            return {
+                "source": source,
+                "success": False,
+                "output": None,
+                "error": result.stderr
+            }
+    except subprocess.TimeoutExpired:
+        return {
+            "source": source,
+            "success": False,
+            "output": None,
+            "error": "Timeout"
+        }
+    except Exception as e:
+        return {
+            "source": source,
+            "success": False,
+            "output": None,
+            "error": str(e)
+        }
+
+
+def run_opencli_hackernews(limit: int = 5) -> Dict:
+    """获取 HackerNews 热门内容"""
+    cmd = ["opencli", "hackernews", "top", "--limit", str(limit)]
+    
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=30
+        )
+        
+        return {
+            "source": "hackernews",
+            "success": result.returncode == 0,
+            "output": result.stdout if result.returncode == 0 else None,
+            "error": result.stderr if result.returncode != 0 else None
+        }
+    except Exception as e:
+        return {
+            "source": "hackernews",
+            "success": False,
+            "output": None,
+            "error": str(e)
+        }
+
+
+def download_content(url: str, output_dir: str) -> Optional[str]:
+    """使用 OpenCLI web read 下载文档内容"""
+    url_hash = hashlib.md5(url.encode()).hexdigest()[:12]
+    filename = f"web_{url_hash}.md"
+    output_path = os.path.join(output_dir, filename)
+    
+    cmd = ["opencli", "web", "read", "--url", url, "--output", output_path]
+    
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+        
+        if result.returncode == 0 and os.path.exists(output_path):
+            return output_path
+        return None
+    except Exception:
+        return None
+
+
+def select_sources(query: str, intent: Optional[str] = None) -> List[str]:
+    """根据查询意图选择数据源"""
+    sources = []
+    
+    if intent:
+        if intent == "academic":
+            sources.extend(SOURCES["academic"].keys())
+        elif intent == "technical":
+            sources.extend(SOURCES["technical"].keys())
+        elif intent == "chinese":
+            sources.extend(SOURCES["chinese"].keys())
+        elif intent == "news":
+            sources.extend(SOURCES["news"].keys())
+        else:
+            # 通用搜索 - 选择所有公开源
+            for category in ["academic", "technical", "chinese", "news"]:
+                for source, config in SOURCES[category].items():
+                    if config["type"] == "public":
+                        sources.append(source)
+    else:
+        # 自动判断
+        query_lower = query.lower()
+        
+        # 学术关键词
+        if any(kw in query_lower for kw in ["paper", "论文", "arxiv", "research", "study"]):
+            sources.extend(SOURCES["academic"].keys())
+        
+        # 技术关键词
+        if any(kw in query_lower for kw in ["python", "javascript", "code", "programming", "bug", "error"]):
+            sources.extend(["stackoverflow", "hackernews"])
+        
+        # 中文关键词
+        if any('\u4e00' <= char <= '\u9fff' for char in query):
+            sources.extend(["36kr"])  # 优先公开源
+        
+        # 默认添加通用源
+        if not sources:
+            sources = ["arxiv", "stackoverflow", "36kr"]
+    
+    return list(set(sources))
+
+
+def parallel_search(query: str, sources: List[str]) -> Dict[str, Dict]:
+    """并行执行多源搜索"""
+    results = {}
+    
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        future_to_source = {}
+        
+        for source in sources:
+            if source == "hackernews":
+                future = executor.submit(run_opencli_hackernews)
+            else:
+                limit = 5
+                for category in SOURCES.values():
+                    if source in category:
+                        limit = category[source].get("limit", 5)
+                        break
+                future = executor.submit(run_opencli_search, source, query, limit)
+            
+            future_to_source[future] = source
+        
+        for future in as_completed(future_to_source):
+            source = future_to_source[future]
+            try:
+                results[source] = future.result()
+            except Exception as e:
+                results[source] = {
+                    "source": source,
+                    "success": False,
+                    "output": None,
+                    "error": str(e)
+                }
+    
+    return results
+
+
+def save_results(storage_path: str, query: str, results: Dict) -> str:
+    """保存搜索结果到本地"""
+    # 保存元数据
+    metadata = {
+        "query": query,
+        "timestamp": datetime.now().isoformat(),
+        "sources": list(results.keys()),
+        "success_count": sum(1 for r in results.values() if r["success"])
+    }
+    
+    metadata_path = os.path.join(storage_path, "metadata.json")
+    with open(metadata_path, "w", encoding="utf-8") as f:
+        json.dump(metadata, f, ensure_ascii=False, indent=2)
+    
+    # 保存完整结果
+    results_path = os.path.join(storage_path, "results.json")
+    with open(results_path, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    
+    return storage_path
+
+
+def print_report(query: str, results: Dict, storage_path: str):
+    """打印搜索结果报告"""
+    print(f"\n{'='*60}")
+    print(f"搜索报告: {query}")
+    print(f"{'='*60}")
+    
+    print(f"\n存储位置: {storage_path}")
+    print(f"数据源: {', '.join(results.keys())}")
+    
+    success_count = sum(1 for r in results.values() if r["success"])
+    print(f"成功: {success_count}/{len(results)}")
+    
+    print("\n" + "-"*60)
+    print("各源结果:")
+    print("-"*60)
+    
+    for source, result in results.items():
+        status = "✓" if result["success"] else "✗"
+        print(f"\n[{status}] {source}")
+        
+        if result["success"] and result["output"]:
+            # 截断输出，避免过长
+            output = result["output"][:500]
+            if len(result["output"]) > 500:
+                output += "..."
+            print(output)
+        elif result["error"]:
+            print(f"  错误: {result['error'][:100]}")
+    
+    print(f"\n{'='*60}\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="OpenCLI 多源 Web 搜索")
+    parser.add_argument("query", help="搜索查询")
+    parser.add_argument("--intent", choices=["academic", "technical", "chinese", "news", "general"],
+                        help="搜索意图类型")
+    parser.add_argument("--sources", nargs="+", help="指定数据源")
+    parser.add_argument("--download", action="store_true", help="下载高相关性文档")
+    parser.add_argument("--output", help="输出目录")
+    
+    args = parser.parse_args()
+    
+    # 创建存储目录
+    if args.output:
+        storage_path = args.output
+        os.makedirs(storage_path, exist_ok=True)
+    else:
+        storage_path = create_storage_dir(args.query)
+    
+    print(f"存储路径: {storage_path}")
+    
+    # 选择数据源
+    if args.sources:
+        sources = args.sources
+    else:
+        sources = select_sources(args.query, args.intent)
+    
+    print(f"搜索源: {', '.join(sources)}")
+    print("正在并行搜索...")
+    
+    # 执行并行搜索
+    results = parallel_search(args.query, sources)
+    
+    # 保存结果
+    save_results(storage_path, args.query, results)
+    
+    # 打印报告
+    print_report(args.query, results, storage_path)
+    
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())