feat(repo): 整理 Qoder Skills 和 MCP 配置到仓库

- 添加 5 个用户级别 Skills: - auto-commit: 自动 Git 提交 - karpathy-guidelines: 编码规范指南 - opencli-websearch: 多源网络搜索 - pdf-reader: PDF 内容提取 - repo-analyzer: 项目深度分析 - 添加 Playwright MCP 配置 (21 个浏览器自动化工具) - 创建完整的 README.md 文档说明
2026-04-18 11:17:41 +08:00
commit c3ea38c045
33 changed files with 2677 additions and 0 deletions
--- a/skills/opencli-websearch/scripts/download_content.py
+++ b/skills/opencli-websearch/scripts/download_content.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+"""
+使用 OpenCLI web read 下载文档内容
+"""
+
+import os
+import sys
+import json
+import hashlib
+import subprocess
+import argparse
+from pathlib import Path
+from typing import Optional, List
+from urllib.parse import urlparse
+
+
+def download_with_opencli(url: str, output_dir: str, timeout: int = 60) -> Optional[str]:
+    """
+    使用 OpenCLI web read 下载文档内容
+    
+    Args:
+        url: 要下载的 URL
+        output_dir: 输出目录
+        timeout: 超时时间（秒）
+    
+    Returns:
+        下载文件的本地路径，失败返回 None
+    """
+    # 生成文件名
+    url_hash = hashlib.md5(url.encode()).hexdigest()[:12]
+    parsed = urlparse(url)
+    domain = parsed.netloc.replace(".", "_")
+    filename = f"{domain}_{url_hash}.md"
+    output_path = os.path.join(output_dir, filename)
+    
+    # 构建命令
+    cmd = ["opencli", "web", "read", "--url", url, "--output", output_path]
+    
+    print(f"下载: {url}")
+    print(f"输出: {output_path}")
+    
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=timeout
+        )
+        
+        if result.returncode == 0:
+            if os.path.exists(output_path):
+                file_size = os.path.getsize(output_path)
+                print(f"✓ 成功下载 ({file_size} bytes)")
+                return output_path
+            else:
+                print(f"✗ 文件未生成")
+                return None
+        else:
+            print(f"✗ 下载失败: {result.stderr[:200]}")
+            return None
+            
+    except subprocess.TimeoutExpired:
+        print(f"✗ 下载超时")
+        return None
+    except Exception as e:
+        print(f"✗ 错误: {str(e)}")
+        return None
+
+
+def batch_download(urls: List[str], output_dir: str, max_workers: int = 3) -> dict:
+    """
+    批量下载多个 URL
+    
+    Args:
+        urls: URL 列表
+        output_dir: 输出目录
+        max_workers: 最大并行数
+    
+    Returns:
+        下载结果字典 {url: local_path or None}
+    """
+    from concurrent.futures import ThreadPoolExecutor, as_completed
+    
+    results = {}
+    
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_to_url = {
+            executor.submit(download_with_opencli, url, output_dir): url 
+            for url in urls
+        }
+        
+        for future in as_completed(future_to_url):
+            url = future_to_url[future]
+            try:
+                results[url] = future.result()
+            except Exception as e:
+                print(f"✗ {url} 异常: {str(e)}")
+                results[url] = None
+    
+    return results
+
+
+def load_results_from_search(search_dir: str) -> List[str]:
+    """
+    从之前的搜索结果中加载 URL 列表
+    
+    Args:
+        search_dir: 搜索结果目录
+    
+    Returns:
+        URL 列表
+    """
+    results_file = os.path.join(search_dir, "results.json")
+    
+    if not os.path.exists(results_file):
+        print(f"未找到结果文件: {results_file}")
+        return []
+    
+    with open(results_file, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    
+    urls = []
+    for source, result in data.items():
+        if result.get("success") and result.get("output"):
+            # 简单解析输出中的 URL
+            output = result["output"]
+            for line in output.split("\n"):
+                if "url:" in line.lower() or "http" in line:
+                    # 提取 URL
+                    import re
+                    url_match = re.search(r'https?://[^\s\'"<>]+', line)
+                    if url_match:
+                        urls.append(url_match.group())
+    
+    return list(set(urls))
+
+
+def main():
+    parser = argparse.ArgumentParser(description="使用 OpenCLI 下载文档内容")
+    parser.add_argument("--url", help="单个 URL 下载")
+    parser.add_argument("--urls", nargs="+", help="多个 URL 下载")
+    parser.add_argument("--from-search", help="从搜索结果目录加载 URL")
+    parser.add_argument("--output-dir", required=True, help="输出目录")
+    parser.add_argument("--max-workers", type=int, default=3, help="最大并行数")
+    
+    args = parser.parse_args()
+    
+    # 确保输出目录存在
+    os.makedirs(args.output_dir, exist_ok=True)
+    
+    # 收集 URL 列表
+    urls = []
+    
+    if args.url:
+        urls.append(args.url)
+    
+    if args.urls:
+        urls.extend(args.urls)
+    
+    if args.from_search:
+        search_urls = load_results_from_search(args.from_search)
+        urls.extend(search_urls)
+        print(f"从搜索结果加载 {len(search_urls)} 个 URL")
+    
+    if not urls:
+        print("错误: 未提供 URL")
+        return 1
+    
+    # 去重
+    urls = list(set(urls))
+    print(f"\n共 {len(urls)} 个唯一 URL 待下载\n")
+    
+    # 批量下载
+    results = batch_download(urls, args.output_dir, args.max_workers)
+    
+    # 统计
+    success_count = sum(1 for v in results.values() if v is not None)
+    print(f"\n{'='*60}")
+    print(f"下载完成: {success_count}/{len(urls)} 成功")
+    print(f"{'='*60}")
+    
+    # 保存下载记录
+    record_file = os.path.join(args.output_dir, "download_record.json")
+    with open(record_file, "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    
+    print(f"下载记录: {record_file}")
+    
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())