Files
qoder-config/skills/opencli-websearch/scripts/unified_search.py
aszerW c3ea38c045 feat(repo): 整理 Qoder Skills 和 MCP 配置到仓库
- 添加 5 个用户级别 Skills:
  - auto-commit: 自动 Git 提交
  - karpathy-guidelines: 编码规范指南
  - opencli-websearch: 多源网络搜索
  - pdf-reader: PDF 内容提取
  - repo-analyzer: 项目深度分析

- 添加 Playwright MCP 配置 (21 个浏览器自动化工具)
- 创建完整的 README.md 文档说明
2026-04-18 11:17:41 +08:00

246 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
统一搜索入口 - 整合 Qoder WebSearch 和 OpenCLI 多源搜索
"""
import os
import sys
import json
import hashlib
import subprocess
import argparse
from datetime import datetime
from typing import List, Dict, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
def create_storage_dir(query: str) -> str:
"""创建临时存储目录"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
query_hash = hashlib.md5(query.encode()).hexdigest()[:8]
dir_name = f"{timestamp}_{query_hash}"
storage_path = os.path.expanduser(f"~/Downloads/opencli-websearch-data/{dir_name}")
os.makedirs(storage_path, exist_ok=True)
os.makedirs(os.path.join(storage_path, "content"), exist_ok=True)
return storage_path
def run_opencli_search(source: str, query: str, limit: int = 5) -> Dict:
"""执行 OpenCLI 搜索"""
if source == "hackernews":
cmd = ["opencli", "hackernews", "top", "--limit", str(limit)]
else:
cmd = ["opencli", source, "search", query, "--limit", str(limit)]
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=60
)
return {
"source": source,
"success": result.returncode == 0,
"output": result.stdout if result.returncode == 0 else None,
"error": result.stderr if result.returncode != 0 else None
}
except subprocess.TimeoutExpired:
return {"source": source, "success": False, "output": None, "error": "Timeout"}
except Exception as e:
return {"source": source, "success": False, "output": None, "error": str(e)}
def run_qoder_websearch(query: str) -> Dict:
"""
执行 Qoder WebSearch
注意:此函数需要 Qoder 环境支持,实际使用时通过 Qoder 工具调用
"""
# 这是一个占位符,实际使用时 Qoder 会直接调用 websearch
# 这里返回一个标记,表示需要 Qoder 处理
return {
"source": "qoder_websearch",
"success": True,
"output": "[Qoder WebSearch 结果将在此处合并]",
"error": None,
"needs_qoder": True
}
def parallel_search(query: str, sources: List[str], use_qoder: bool = True) -> Dict[str, Dict]:
"""并行执行多源搜索"""
results = {}
# 如果启用 Qoder先标记
if use_qoder:
results["qoder_websearch"] = run_qoder_websearch(query)
# 并行执行 OpenCLI 搜索
with ThreadPoolExecutor(max_workers=5) as executor:
future_to_source = {
executor.submit(run_opencli_search, source, query, 5): source
for source in sources
}
for future in as_completed(future_to_source):
source = future_to_source[future]
try:
results[source] = future.result()
except Exception as e:
results[source] = {"source": source, "success": False, "output": None, "error": str(e)}
return results
def select_sources(query: str, intent: Optional[str] = None) -> List[str]:
"""根据查询意图选择数据源"""
sources = []
# 数据源配置
SOURCE_CONFIG = {
"academic": ["arxiv"],
"technical": ["stackoverflow", "hackernews", "gitee"],
"chinese": ["36kr", "zhihu", "xiaohongshu"],
"news": ["bbc", "reuters"],
"general": ["google"]
}
if intent and intent in SOURCE_CONFIG:
sources = SOURCE_CONFIG[intent]
else:
# 自动判断
query_lower = query.lower()
# 学术关键词
if any(kw in query_lower for kw in ["paper", "论文", "arxiv", "research", "study"]):
sources.extend(SOURCE_CONFIG["academic"])
# 技术关键词
if any(kw in query_lower for kw in ["python", "javascript", "code", "programming", "bug", "error"]):
sources.extend(["stackoverflow", "hackernews"])
# 中文关键词 - 优先公开源
if any('\u4e00' <= char <= '\u9fff' for char in query):
sources.extend(["36kr"])
# 默认源
if not sources:
sources = ["arxiv", "stackoverflow", "36kr", "hackernews"]
return list(set(sources))
def generate_report(query: str, results: Dict, storage_path: str) -> str:
"""生成 Markdown 格式搜索报告"""
report = []
report.append(f"# 搜索报告: {query}\n")
report.append(f"**搜索时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
report.append(f"**存储位置**: `{storage_path}`\n")
# 统计
success_count = sum(1 for r in results.values() if r.get("success"))
report.append(f"**数据源**: {len(results)} 个 | **成功**: {success_count}\n")
report.append("---\n")
# 按来源分类展示
for source, result in sorted(results.items()):
status_icon = "" if result.get("success") else ""
report.append(f"\n## {status_icon} {source.upper()}\n")
if result.get("success") and result.get("output"):
output = result["output"]
# 截断过长输出
if len(output) > 2000:
output = output[:2000] + "\n\n... (内容已截断)"
report.append(f"```\n{output}\n```\n")
elif result.get("error"):
report.append(f"```\n错误: {result['error'][:200]}\n```\n")
if result.get("needs_qoder"):
report.append("> 📝 **注意**: Qoder WebSearch 结果将通过 Qoder 工具直接提供\n")
report.append("\n---\n")
report.append("*由 OpenCLI WebSearch Skill 生成*\n")
return "\n".join(report)
def save_results(storage_path: str, query: str, results: Dict, report: str):
"""保存搜索结果"""
# 保存元数据
metadata = {
"query": query,
"timestamp": datetime.now().isoformat(),
"sources": list(results.keys()),
"success_count": sum(1 for r in results.values() if r.get("success"))
}
with open(os.path.join(storage_path, "metadata.json"), "w", encoding="utf-8") as f:
json.dump(metadata, f, ensure_ascii=False, indent=2)
# 保存原始结果
with open(os.path.join(storage_path, "results.json"), "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
# 保存报告
with open(os.path.join(storage_path, "report.md"), "w", encoding="utf-8") as f:
f.write(report)
def main():
parser = argparse.ArgumentParser(description="统一搜索入口 - Qoder + OpenCLI")
parser.add_argument("query", help="搜索查询")
parser.add_argument("--intent", choices=["academic", "technical", "chinese", "news", "general"],
help="搜索意图类型")
parser.add_argument("--sources", nargs="+", help="指定 OpenCLI 数据源")
parser.add_argument("--no-qoder", action="store_true", help="不使用 Qoder WebSearch")
parser.add_argument("--output", help="输出目录")
args = parser.parse_args()
# 创建存储目录
if args.output:
storage_path = args.output
os.makedirs(storage_path, exist_ok=True)
else:
storage_path = create_storage_dir(args.query)
print(f"📁 存储路径: {storage_path}\n")
# 选择数据源
if args.sources:
sources = args.sources
else:
sources = select_sources(args.query, args.intent)
print(f"🔍 OpenCLI 数据源: {', '.join(sources)}")
print(f"🔍 Qoder WebSearch: {'禁用' if args.no_qoder else '启用'}\n")
# 执行并行搜索
print("⏳ 正在并行搜索...\n")
results = parallel_search(args.query, sources, use_qoder=not args.no_qoder)
# 生成报告
report = generate_report(args.query, results, storage_path)
# 保存结果
save_results(storage_path, args.query, results, report)
# 输出报告
print(report)
print(f"\n✅ 搜索完成!")
print(f"📄 报告: {os.path.join(storage_path, 'report.md')}")
print(f"📊 数据: {os.path.join(storage_path, 'results.json')}")
return 0
if __name__ == "__main__":
sys.exit(main())