#!/usr/bin/env python3 """ 统一搜索入口 - 整合 Qoder WebSearch 和 OpenCLI 多源搜索 """ import os import sys import json import hashlib import subprocess import argparse from datetime import datetime from typing import List, Dict, Optional from concurrent.futures import ThreadPoolExecutor, as_completed def create_storage_dir(query: str) -> str: """创建临时存储目录""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") query_hash = hashlib.md5(query.encode()).hexdigest()[:8] dir_name = f"{timestamp}_{query_hash}" storage_path = os.path.expanduser(f"~/Downloads/opencli-websearch-data/{dir_name}") os.makedirs(storage_path, exist_ok=True) os.makedirs(os.path.join(storage_path, "content"), exist_ok=True) return storage_path def run_opencli_search(source: str, query: str, limit: int = 5) -> Dict: """执行 OpenCLI 搜索""" if source == "hackernews": cmd = ["opencli", "hackernews", "top", "--limit", str(limit)] else: cmd = ["opencli", source, "search", query, "--limit", str(limit)] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=60 ) return { "source": source, "success": result.returncode == 0, "output": result.stdout if result.returncode == 0 else None, "error": result.stderr if result.returncode != 0 else None } except subprocess.TimeoutExpired: return {"source": source, "success": False, "output": None, "error": "Timeout"} except Exception as e: return {"source": source, "success": False, "output": None, "error": str(e)} def run_qoder_websearch(query: str) -> Dict: """ 执行 Qoder WebSearch 注意:此函数需要 Qoder 环境支持,实际使用时通过 Qoder 工具调用 """ # 这是一个占位符,实际使用时 Qoder 会直接调用 websearch # 这里返回一个标记,表示需要 Qoder 处理 return { "source": "qoder_websearch", "success": True, "output": "[Qoder WebSearch 结果将在此处合并]", "error": None, "needs_qoder": True } def parallel_search(query: str, sources: List[str], use_qoder: bool = True) -> Dict[str, Dict]: """并行执行多源搜索""" results = {} # 如果启用 Qoder,先标记 if use_qoder: results["qoder_websearch"] = run_qoder_websearch(query) # 并行执行 OpenCLI 搜索 with ThreadPoolExecutor(max_workers=5) as executor: future_to_source = { executor.submit(run_opencli_search, source, query, 5): source for source in sources } for future in as_completed(future_to_source): source = future_to_source[future] try: results[source] = future.result() except Exception as e: results[source] = {"source": source, "success": False, "output": None, "error": str(e)} return results def select_sources(query: str, intent: Optional[str] = None) -> List[str]: """根据查询意图选择数据源""" sources = [] # 数据源配置 SOURCE_CONFIG = { "academic": ["arxiv"], "technical": ["stackoverflow", "hackernews", "gitee"], "chinese": ["36kr", "zhihu", "xiaohongshu"], "news": ["bbc", "reuters"], "general": ["google"] } if intent and intent in SOURCE_CONFIG: sources = SOURCE_CONFIG[intent] else: # 自动判断 query_lower = query.lower() # 学术关键词 if any(kw in query_lower for kw in ["paper", "论文", "arxiv", "research", "study"]): sources.extend(SOURCE_CONFIG["academic"]) # 技术关键词 if any(kw in query_lower for kw in ["python", "javascript", "code", "programming", "bug", "error"]): sources.extend(["stackoverflow", "hackernews"]) # 中文关键词 - 优先公开源 if any('\u4e00' <= char <= '\u9fff' for char in query): sources.extend(["36kr"]) # 默认源 if not sources: sources = ["arxiv", "stackoverflow", "36kr", "hackernews"] return list(set(sources)) def generate_report(query: str, results: Dict, storage_path: str) -> str: """生成 Markdown 格式搜索报告""" report = [] report.append(f"# 搜索报告: {query}\n") report.append(f"**搜索时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") report.append(f"**存储位置**: `{storage_path}`\n") # 统计 success_count = sum(1 for r in results.values() if r.get("success")) report.append(f"**数据源**: {len(results)} 个 | **成功**: {success_count} 个\n") report.append("---\n") # 按来源分类展示 for source, result in sorted(results.items()): status_icon = "✅" if result.get("success") else "❌" report.append(f"\n## {status_icon} {source.upper()}\n") if result.get("success") and result.get("output"): output = result["output"] # 截断过长输出 if len(output) > 2000: output = output[:2000] + "\n\n... (内容已截断)" report.append(f"```\n{output}\n```\n") elif result.get("error"): report.append(f"```\n错误: {result['error'][:200]}\n```\n") if result.get("needs_qoder"): report.append("> 📝 **注意**: Qoder WebSearch 结果将通过 Qoder 工具直接提供\n") report.append("\n---\n") report.append("*由 OpenCLI WebSearch Skill 生成*\n") return "\n".join(report) def save_results(storage_path: str, query: str, results: Dict, report: str): """保存搜索结果""" # 保存元数据 metadata = { "query": query, "timestamp": datetime.now().isoformat(), "sources": list(results.keys()), "success_count": sum(1 for r in results.values() if r.get("success")) } with open(os.path.join(storage_path, "metadata.json"), "w", encoding="utf-8") as f: json.dump(metadata, f, ensure_ascii=False, indent=2) # 保存原始结果 with open(os.path.join(storage_path, "results.json"), "w", encoding="utf-8") as f: json.dump(results, f, ensure_ascii=False, indent=2) # 保存报告 with open(os.path.join(storage_path, "report.md"), "w", encoding="utf-8") as f: f.write(report) def main(): parser = argparse.ArgumentParser(description="统一搜索入口 - Qoder + OpenCLI") parser.add_argument("query", help="搜索查询") parser.add_argument("--intent", choices=["academic", "technical", "chinese", "news", "general"], help="搜索意图类型") parser.add_argument("--sources", nargs="+", help="指定 OpenCLI 数据源") parser.add_argument("--no-qoder", action="store_true", help="不使用 Qoder WebSearch") parser.add_argument("--output", help="输出目录") args = parser.parse_args() # 创建存储目录 if args.output: storage_path = args.output os.makedirs(storage_path, exist_ok=True) else: storage_path = create_storage_dir(args.query) print(f"📁 存储路径: {storage_path}\n") # 选择数据源 if args.sources: sources = args.sources else: sources = select_sources(args.query, args.intent) print(f"🔍 OpenCLI 数据源: {', '.join(sources)}") print(f"🔍 Qoder WebSearch: {'禁用' if args.no_qoder else '启用'}\n") # 执行并行搜索 print("⏳ 正在并行搜索...\n") results = parallel_search(args.query, sources, use_qoder=not args.no_qoder) # 生成报告 report = generate_report(args.query, results, storage_path) # 保存结果 save_results(storage_path, args.query, results, report) # 输出报告 print(report) print(f"\n✅ 搜索完成!") print(f"📄 报告: {os.path.join(storage_path, 'report.md')}") print(f"📊 数据: {os.path.join(storage_path, 'results.json')}") return 0 if __name__ == "__main__": sys.exit(main())