feat(repo): 整理 Qoder Skills 和 MCP 配置到仓库
- 添加 5 个用户级别 Skills: - auto-commit: 自动 Git 提交 - karpathy-guidelines: 编码规范指南 - opencli-websearch: 多源网络搜索 - pdf-reader: PDF 内容提取 - repo-analyzer: 项目深度分析 - 添加 Playwright MCP 配置 (21 个浏览器自动化工具) - 创建完整的 README.md 文档说明
This commit is contained in:
245
skills/opencli-websearch/scripts/unified_search.py
Normal file
245
skills/opencli-websearch/scripts/unified_search.py
Normal file
@@ -0,0 +1,245 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
统一搜索入口 - 整合 Qoder WebSearch 和 OpenCLI 多源搜索
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import hashlib
|
||||
import subprocess
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
|
||||
def create_storage_dir(query: str) -> str:
|
||||
"""创建临时存储目录"""
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
query_hash = hashlib.md5(query.encode()).hexdigest()[:8]
|
||||
dir_name = f"{timestamp}_{query_hash}"
|
||||
storage_path = os.path.expanduser(f"~/Downloads/opencli-websearch-data/{dir_name}")
|
||||
|
||||
os.makedirs(storage_path, exist_ok=True)
|
||||
os.makedirs(os.path.join(storage_path, "content"), exist_ok=True)
|
||||
|
||||
return storage_path
|
||||
|
||||
|
||||
def run_opencli_search(source: str, query: str, limit: int = 5) -> Dict:
|
||||
"""执行 OpenCLI 搜索"""
|
||||
if source == "hackernews":
|
||||
cmd = ["opencli", "hackernews", "top", "--limit", str(limit)]
|
||||
else:
|
||||
cmd = ["opencli", source, "search", query, "--limit", str(limit)]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
return {
|
||||
"source": source,
|
||||
"success": result.returncode == 0,
|
||||
"output": result.stdout if result.returncode == 0 else None,
|
||||
"error": result.stderr if result.returncode != 0 else None
|
||||
}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"source": source, "success": False, "output": None, "error": "Timeout"}
|
||||
except Exception as e:
|
||||
return {"source": source, "success": False, "output": None, "error": str(e)}
|
||||
|
||||
|
||||
def run_qoder_websearch(query: str) -> Dict:
|
||||
"""
|
||||
执行 Qoder WebSearch
|
||||
注意:此函数需要 Qoder 环境支持,实际使用时通过 Qoder 工具调用
|
||||
"""
|
||||
# 这是一个占位符,实际使用时 Qoder 会直接调用 websearch
|
||||
# 这里返回一个标记,表示需要 Qoder 处理
|
||||
return {
|
||||
"source": "qoder_websearch",
|
||||
"success": True,
|
||||
"output": "[Qoder WebSearch 结果将在此处合并]",
|
||||
"error": None,
|
||||
"needs_qoder": True
|
||||
}
|
||||
|
||||
|
||||
def parallel_search(query: str, sources: List[str], use_qoder: bool = True) -> Dict[str, Dict]:
|
||||
"""并行执行多源搜索"""
|
||||
results = {}
|
||||
|
||||
# 如果启用 Qoder,先标记
|
||||
if use_qoder:
|
||||
results["qoder_websearch"] = run_qoder_websearch(query)
|
||||
|
||||
# 并行执行 OpenCLI 搜索
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
future_to_source = {
|
||||
executor.submit(run_opencli_search, source, query, 5): source
|
||||
for source in sources
|
||||
}
|
||||
|
||||
for future in as_completed(future_to_source):
|
||||
source = future_to_source[future]
|
||||
try:
|
||||
results[source] = future.result()
|
||||
except Exception as e:
|
||||
results[source] = {"source": source, "success": False, "output": None, "error": str(e)}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def select_sources(query: str, intent: Optional[str] = None) -> List[str]:
|
||||
"""根据查询意图选择数据源"""
|
||||
sources = []
|
||||
|
||||
# 数据源配置
|
||||
SOURCE_CONFIG = {
|
||||
"academic": ["arxiv"],
|
||||
"technical": ["stackoverflow", "hackernews", "gitee"],
|
||||
"chinese": ["36kr", "zhihu", "xiaohongshu"],
|
||||
"news": ["bbc", "reuters"],
|
||||
"general": ["google"]
|
||||
}
|
||||
|
||||
if intent and intent in SOURCE_CONFIG:
|
||||
sources = SOURCE_CONFIG[intent]
|
||||
else:
|
||||
# 自动判断
|
||||
query_lower = query.lower()
|
||||
|
||||
# 学术关键词
|
||||
if any(kw in query_lower for kw in ["paper", "论文", "arxiv", "research", "study"]):
|
||||
sources.extend(SOURCE_CONFIG["academic"])
|
||||
|
||||
# 技术关键词
|
||||
if any(kw in query_lower for kw in ["python", "javascript", "code", "programming", "bug", "error"]):
|
||||
sources.extend(["stackoverflow", "hackernews"])
|
||||
|
||||
# 中文关键词 - 优先公开源
|
||||
if any('\u4e00' <= char <= '\u9fff' for char in query):
|
||||
sources.extend(["36kr"])
|
||||
|
||||
# 默认源
|
||||
if not sources:
|
||||
sources = ["arxiv", "stackoverflow", "36kr", "hackernews"]
|
||||
|
||||
return list(set(sources))
|
||||
|
||||
|
||||
def generate_report(query: str, results: Dict, storage_path: str) -> str:
|
||||
"""生成 Markdown 格式搜索报告"""
|
||||
report = []
|
||||
|
||||
report.append(f"# 搜索报告: {query}\n")
|
||||
report.append(f"**搜索时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
report.append(f"**存储位置**: `{storage_path}`\n")
|
||||
|
||||
# 统计
|
||||
success_count = sum(1 for r in results.values() if r.get("success"))
|
||||
report.append(f"**数据源**: {len(results)} 个 | **成功**: {success_count} 个\n")
|
||||
|
||||
report.append("---\n")
|
||||
|
||||
# 按来源分类展示
|
||||
for source, result in sorted(results.items()):
|
||||
status_icon = "✅" if result.get("success") else "❌"
|
||||
report.append(f"\n## {status_icon} {source.upper()}\n")
|
||||
|
||||
if result.get("success") and result.get("output"):
|
||||
output = result["output"]
|
||||
# 截断过长输出
|
||||
if len(output) > 2000:
|
||||
output = output[:2000] + "\n\n... (内容已截断)"
|
||||
report.append(f"```\n{output}\n```\n")
|
||||
elif result.get("error"):
|
||||
report.append(f"```\n错误: {result['error'][:200]}\n```\n")
|
||||
|
||||
if result.get("needs_qoder"):
|
||||
report.append("> 📝 **注意**: Qoder WebSearch 结果将通过 Qoder 工具直接提供\n")
|
||||
|
||||
report.append("\n---\n")
|
||||
report.append("*由 OpenCLI WebSearch Skill 生成*\n")
|
||||
|
||||
return "\n".join(report)
|
||||
|
||||
|
||||
def save_results(storage_path: str, query: str, results: Dict, report: str):
|
||||
"""保存搜索结果"""
|
||||
# 保存元数据
|
||||
metadata = {
|
||||
"query": query,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"sources": list(results.keys()),
|
||||
"success_count": sum(1 for r in results.values() if r.get("success"))
|
||||
}
|
||||
|
||||
with open(os.path.join(storage_path, "metadata.json"), "w", encoding="utf-8") as f:
|
||||
json.dump(metadata, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# 保存原始结果
|
||||
with open(os.path.join(storage_path, "results.json"), "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# 保存报告
|
||||
with open(os.path.join(storage_path, "report.md"), "w", encoding="utf-8") as f:
|
||||
f.write(report)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="统一搜索入口 - Qoder + OpenCLI")
|
||||
parser.add_argument("query", help="搜索查询")
|
||||
parser.add_argument("--intent", choices=["academic", "technical", "chinese", "news", "general"],
|
||||
help="搜索意图类型")
|
||||
parser.add_argument("--sources", nargs="+", help="指定 OpenCLI 数据源")
|
||||
parser.add_argument("--no-qoder", action="store_true", help="不使用 Qoder WebSearch")
|
||||
parser.add_argument("--output", help="输出目录")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 创建存储目录
|
||||
if args.output:
|
||||
storage_path = args.output
|
||||
os.makedirs(storage_path, exist_ok=True)
|
||||
else:
|
||||
storage_path = create_storage_dir(args.query)
|
||||
|
||||
print(f"📁 存储路径: {storage_path}\n")
|
||||
|
||||
# 选择数据源
|
||||
if args.sources:
|
||||
sources = args.sources
|
||||
else:
|
||||
sources = select_sources(args.query, args.intent)
|
||||
|
||||
print(f"🔍 OpenCLI 数据源: {', '.join(sources)}")
|
||||
print(f"🔍 Qoder WebSearch: {'禁用' if args.no_qoder else '启用'}\n")
|
||||
|
||||
# 执行并行搜索
|
||||
print("⏳ 正在并行搜索...\n")
|
||||
results = parallel_search(args.query, sources, use_qoder=not args.no_qoder)
|
||||
|
||||
# 生成报告
|
||||
report = generate_report(args.query, results, storage_path)
|
||||
|
||||
# 保存结果
|
||||
save_results(storage_path, args.query, results, report)
|
||||
|
||||
# 输出报告
|
||||
print(report)
|
||||
|
||||
print(f"\n✅ 搜索完成!")
|
||||
print(f"📄 报告: {os.path.join(storage_path, 'report.md')}")
|
||||
print(f"📊 数据: {os.path.join(storage_path, 'results.json')}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user