架构设计: - 对外统一接口 fetch():自动识别资产类型并路由 - 对内分层实现:各资产类型独立方法,职责单一 新增文件: - datasource/universal_fetcher.py: 统一数据获取器 - _fetch_china_index: A股指数(Tushare) - _fetch_china_etf: A股ETF(含净值) - _fetch_us_index: 美股指数(YFinance+SSH) - _fetch_hk_index: 港股指数(YFinance+SSH) - _fetch_futures: 期货(Tushare/YFinance) - fetch_etf_with_nav: ETF价格+净值(计算溢价率) - datasource/asset_type_detector.py: 资产类型检测器 - AssetType枚举:9种资产类型 - detect(): 自动识别资产类型 - group_by_type(): 批量分组 - datasource/flask_server.py: Flask API服务 - LRU + TTL 双缓存机制 - 8个API端点:ohlcv、etf/nav、batch、cache等 更新: - datasource/__init__.py: 导出新模块 验证: - 模块导入成功 - 资产类型检测正确 - A股数据获取正常(沪深300: 5条)
322 lines
9.4 KiB
Python
322 lines
9.4 KiB
Python
"""
|
||
统一数据获取器
|
||
|
||
分层架构:对外统一接口,对内按资产类型独立实现
|
||
支持:A股指数/ETF、港股指数、美股指数、期货、加密货币
|
||
|
||
用法:
|
||
from datasource import UniversalDataFetcher
|
||
|
||
fetcher = UniversalDataFetcher()
|
||
|
||
# 单标的获取(自动识别类型)
|
||
df = fetcher.fetch("000300.SH", "2024-01-01", "2024-12-31")
|
||
|
||
# ETF获取(含净值)
|
||
price_df, nav_df = fetcher.fetch_etf_with_nav("513100.SH", "2024-01-01", "2024-12-31")
|
||
|
||
# 批量获取
|
||
results = fetcher.fetch_batch(["000300.SH", "NDX", "N225"], "2024-01-01", "2024-12-31")
|
||
"""
|
||
|
||
import os
|
||
import time
|
||
from typing import Optional, Dict, List, Tuple
|
||
from datetime import datetime
|
||
import pandas as pd
|
||
|
||
from .tushare_source import TushareSource
|
||
from .yfinance_source import YFinanceSource
|
||
from .ssh_tunnel import SSHTunnelManager
|
||
from .asset_type_detector import AssetTypeDetector, AssetType
|
||
|
||
|
||
class UniversalDataFetcher:
|
||
"""
|
||
统一数据获取器
|
||
|
||
分层架构:
|
||
- 对外:统一 fetch() 接口,自动路由
|
||
- 对内:各资产类型独立方法,职责单一
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
ssh_config: Optional[Dict] = None,
|
||
use_cache: bool = True,
|
||
cache_dir: str = "data/etf_cache/daily"
|
||
):
|
||
"""
|
||
初始化
|
||
|
||
Args:
|
||
ssh_config: SSH隧道配置(用于港美股)
|
||
use_cache: 是否使用本地缓存
|
||
cache_dir: 缓存目录
|
||
"""
|
||
self.ssh_config = ssh_config or {}
|
||
self.use_cache = use_cache
|
||
self.cache_dir = cache_dir
|
||
|
||
# 数据源实例
|
||
self._tushare = TushareSource()
|
||
self._yfinance = YFinanceSource()
|
||
|
||
# SSH隧道(延迟初始化)
|
||
self._tunnel: Optional[SSHTunnelManager] = None
|
||
self._tunnel_started = False
|
||
|
||
def __enter__(self):
|
||
"""上下文管理器入口"""
|
||
self._start_tunnel()
|
||
return self
|
||
|
||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||
"""上下文管理器退出"""
|
||
self._stop_tunnel()
|
||
|
||
# ============================================================
|
||
# SSH隧道管理
|
||
# ============================================================
|
||
|
||
def _start_tunnel(self) -> bool:
|
||
"""启动SSH隧道"""
|
||
if self._tunnel_started:
|
||
return True
|
||
|
||
if self.ssh_config.get('enabled'):
|
||
self._tunnel = SSHTunnelManager(self.ssh_config)
|
||
if self._tunnel.start():
|
||
self._tunnel_started = True
|
||
return True
|
||
return False
|
||
return True
|
||
|
||
def _stop_tunnel(self):
|
||
"""停止SSH隧道"""
|
||
if self._tunnel:
|
||
self._tunnel.stop()
|
||
self._tunnel = None
|
||
self._tunnel_started = False
|
||
|
||
# ============================================================
|
||
# 统一入口(自动路由)
|
||
# ============================================================
|
||
|
||
def fetch(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str,
|
||
retry: int = 3
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
统一数据获取入口
|
||
|
||
自动识别资产类型并路由到对应方法
|
||
|
||
Args:
|
||
code: 标的代码
|
||
start_date: 开始日期 'YYYY-MM-DD'
|
||
end_date: 结束日期 'YYYY-MM-DD'
|
||
retry: 重试次数
|
||
|
||
Returns:
|
||
DataFrame with columns: date, open, high, low, close, volume
|
||
"""
|
||
asset_type = AssetTypeDetector.detect(code)
|
||
|
||
for attempt in range(retry):
|
||
try:
|
||
# 路由到具体方法
|
||
if asset_type == AssetType.CHINA_INDEX:
|
||
return self._fetch_china_index(code, start_date, end_date)
|
||
elif asset_type == AssetType.CHINA_ETF:
|
||
return self._fetch_china_etf(code, start_date, end_date)
|
||
elif asset_type == AssetType.US_INDEX:
|
||
return self._fetch_us_index(code, start_date, end_date)
|
||
elif asset_type == AssetType.HK_INDEX:
|
||
return self._fetch_hk_index(code, start_date, end_date)
|
||
elif asset_type == AssetType.FUTURES:
|
||
return self._fetch_futures(code, start_date, end_date)
|
||
elif asset_type == AssetType.CRYPTO:
|
||
return self._fetch_crypto(code, start_date, end_date)
|
||
else:
|
||
print(f"⚠️ 未知资产类型: {code} -> {asset_type}")
|
||
return None
|
||
|
||
except Exception as e:
|
||
if attempt < retry - 1:
|
||
time.sleep(2)
|
||
else:
|
||
print(f"✗ 获取 {code} 失败 (尝试 {attempt+1}/{retry}): {e}")
|
||
return None
|
||
|
||
return None
|
||
|
||
# ============================================================
|
||
# 分层实现:各资产类型独立方法
|
||
# ============================================================
|
||
|
||
def _fetch_china_index(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取A股指数
|
||
|
||
特点:Tushare API,无需SSH隧道
|
||
"""
|
||
return self._tushare.fetch_index(code, start_date, end_date)
|
||
|
||
def _fetch_china_etf(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取A股ETF价格
|
||
|
||
特点:Tushare fund_daily接口
|
||
"""
|
||
return self._tushare.fetch_etf(code, start_date, end_date)
|
||
|
||
def fetch_etf_with_nav(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str
|
||
) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
|
||
"""
|
||
获取ETF价格 + 净值
|
||
|
||
用于计算溢价率
|
||
|
||
Args:
|
||
code: ETF代码
|
||
|
||
Returns:
|
||
(price_df, nav_df)
|
||
"""
|
||
price_df = self._tushare.fetch_etf(code, start_date, end_date)
|
||
nav_df = self._tushare.fetch_etf_nav(code, start_date, end_date)
|
||
return price_df, nav_df
|
||
|
||
def _fetch_us_index(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取美股指数
|
||
|
||
特点:YFinance,需要SSH隧道,指数代码转换
|
||
"""
|
||
self._start_tunnel()
|
||
return self._yfinance.fetch(code, start_date, end_date)
|
||
|
||
def _fetch_hk_index(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取港股指数
|
||
|
||
特点:YFinance,需要SSH隧道
|
||
"""
|
||
self._start_tunnel()
|
||
return self._yfinance.fetch(code, start_date, end_date)
|
||
|
||
def _fetch_futures(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取期货数据
|
||
|
||
特点:
|
||
- 中国期货(.SHF/.DCE/.CZC): Tushare
|
||
- NYMEX(.NYM): YFinance
|
||
"""
|
||
if code.endswith('.NYM'):
|
||
# NYMEX期货走YFinance
|
||
self._start_tunnel()
|
||
return self._yfinance.fetch(code, start_date, end_date)
|
||
else:
|
||
# 中国期货走Tushare
|
||
return self._tushare.fetch_futures(code, start_date, end_date)
|
||
|
||
def _fetch_crypto(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取加密货币
|
||
|
||
特点:CCXT,不支持SOCKS5代理
|
||
|
||
TODO: 实现加密货币获取
|
||
"""
|
||
print(f"⚠️ 加密货币数据获取尚未实现: {code}")
|
||
return None
|
||
|
||
# ============================================================
|
||
# 批量获取
|
||
# ============================================================
|
||
|
||
def fetch_batch(
|
||
self,
|
||
codes: List[str],
|
||
start_date: str,
|
||
end_date: str,
|
||
retry: int = 3
|
||
) -> Dict[str, Optional[pd.DataFrame]]:
|
||
"""
|
||
批量获取多只标的数据
|
||
|
||
Args:
|
||
codes: 代码列表
|
||
start_date: 开始日期
|
||
end_date: 结束日期
|
||
|
||
Returns:
|
||
{code: DataFrame}
|
||
"""
|
||
results = {}
|
||
|
||
# 按资产类型分组
|
||
grouped = AssetTypeDetector.group_by_type(codes)
|
||
|
||
print(f"开始获取 {len(codes)} 只标的...")
|
||
for asset_type, code_list in grouped.items():
|
||
print(f" {asset_type.value}: {len(code_list)} 只")
|
||
|
||
# 启动隧道(港美股需要)
|
||
self._start_tunnel()
|
||
|
||
for code in codes:
|
||
results[code] = self.fetch(code, start_date, end_date, retry)
|
||
|
||
return results
|
||
|
||
# ============================================================
|
||
# 辅助方法
|
||
# ============================================================
|
||
|
||
def get_asset_type(self, code: str) -> AssetType:
|
||
"""获取资产类型"""
|
||
return AssetTypeDetector.detect(code)
|
||
|
||
def is_supported(self, code: str) -> bool:
|
||
"""判断是否支持该代码"""
|
||
return AssetTypeDetector.detect(code) != AssetType.UNKNOWN |