feat: Flask统一数据服务迁移(分层架构)
架构设计: - 对外统一接口 fetch():自动识别资产类型并路由 - 对内分层实现:各资产类型独立方法,职责单一 新增文件: - datasource/universal_fetcher.py: 统一数据获取器 - _fetch_china_index: A股指数(Tushare) - _fetch_china_etf: A股ETF(含净值) - _fetch_us_index: 美股指数(YFinance+SSH) - _fetch_hk_index: 港股指数(YFinance+SSH) - _fetch_futures: 期货(Tushare/YFinance) - fetch_etf_with_nav: ETF价格+净值(计算溢价率) - datasource/asset_type_detector.py: 资产类型检测器 - AssetType枚举:9种资产类型 - detect(): 自动识别资产类型 - group_by_type(): 批量分组 - datasource/flask_server.py: Flask API服务 - LRU + TTL 双缓存机制 - 8个API端点:ohlcv、etf/nav、batch、cache等 更新: - datasource/__init__.py: 导出新模块 验证: - 模块导入成功 - 资产类型检测正确 - A股数据获取正常(沪深300: 5条)
This commit is contained in:
322
datasource/universal_fetcher.py
Normal file
322
datasource/universal_fetcher.py
Normal file
@@ -0,0 +1,322 @@
|
||||
"""
|
||||
统一数据获取器
|
||||
|
||||
分层架构:对外统一接口,对内按资产类型独立实现
|
||||
支持:A股指数/ETF、港股指数、美股指数、期货、加密货币
|
||||
|
||||
用法:
|
||||
from datasource import UniversalDataFetcher
|
||||
|
||||
fetcher = UniversalDataFetcher()
|
||||
|
||||
# 单标的获取(自动识别类型)
|
||||
df = fetcher.fetch("000300.SH", "2024-01-01", "2024-12-31")
|
||||
|
||||
# ETF获取(含净值)
|
||||
price_df, nav_df = fetcher.fetch_etf_with_nav("513100.SH", "2024-01-01", "2024-12-31")
|
||||
|
||||
# 批量获取
|
||||
results = fetcher.fetch_batch(["000300.SH", "NDX", "N225"], "2024-01-01", "2024-12-31")
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Optional, Dict, List, Tuple
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
|
||||
from .tushare_source import TushareSource
|
||||
from .yfinance_source import YFinanceSource
|
||||
from .ssh_tunnel import SSHTunnelManager
|
||||
from .asset_type_detector import AssetTypeDetector, AssetType
|
||||
|
||||
|
||||
class UniversalDataFetcher:
|
||||
"""
|
||||
统一数据获取器
|
||||
|
||||
分层架构:
|
||||
- 对外:统一 fetch() 接口,自动路由
|
||||
- 对内:各资产类型独立方法,职责单一
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ssh_config: Optional[Dict] = None,
|
||||
use_cache: bool = True,
|
||||
cache_dir: str = "data/etf_cache/daily"
|
||||
):
|
||||
"""
|
||||
初始化
|
||||
|
||||
Args:
|
||||
ssh_config: SSH隧道配置(用于港美股)
|
||||
use_cache: 是否使用本地缓存
|
||||
cache_dir: 缓存目录
|
||||
"""
|
||||
self.ssh_config = ssh_config or {}
|
||||
self.use_cache = use_cache
|
||||
self.cache_dir = cache_dir
|
||||
|
||||
# 数据源实例
|
||||
self._tushare = TushareSource()
|
||||
self._yfinance = YFinanceSource()
|
||||
|
||||
# SSH隧道(延迟初始化)
|
||||
self._tunnel: Optional[SSHTunnelManager] = None
|
||||
self._tunnel_started = False
|
||||
|
||||
def __enter__(self):
|
||||
"""上下文管理器入口"""
|
||||
self._start_tunnel()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""上下文管理器退出"""
|
||||
self._stop_tunnel()
|
||||
|
||||
# ============================================================
|
||||
# SSH隧道管理
|
||||
# ============================================================
|
||||
|
||||
def _start_tunnel(self) -> bool:
|
||||
"""启动SSH隧道"""
|
||||
if self._tunnel_started:
|
||||
return True
|
||||
|
||||
if self.ssh_config.get('enabled'):
|
||||
self._tunnel = SSHTunnelManager(self.ssh_config)
|
||||
if self._tunnel.start():
|
||||
self._tunnel_started = True
|
||||
return True
|
||||
return False
|
||||
return True
|
||||
|
||||
def _stop_tunnel(self):
|
||||
"""停止SSH隧道"""
|
||||
if self._tunnel:
|
||||
self._tunnel.stop()
|
||||
self._tunnel = None
|
||||
self._tunnel_started = False
|
||||
|
||||
# ============================================================
|
||||
# 统一入口(自动路由)
|
||||
# ============================================================
|
||||
|
||||
def fetch(
|
||||
self,
|
||||
code: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
retry: int = 3
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
统一数据获取入口
|
||||
|
||||
自动识别资产类型并路由到对应方法
|
||||
|
||||
Args:
|
||||
code: 标的代码
|
||||
start_date: 开始日期 'YYYY-MM-DD'
|
||||
end_date: 结束日期 'YYYY-MM-DD'
|
||||
retry: 重试次数
|
||||
|
||||
Returns:
|
||||
DataFrame with columns: date, open, high, low, close, volume
|
||||
"""
|
||||
asset_type = AssetTypeDetector.detect(code)
|
||||
|
||||
for attempt in range(retry):
|
||||
try:
|
||||
# 路由到具体方法
|
||||
if asset_type == AssetType.CHINA_INDEX:
|
||||
return self._fetch_china_index(code, start_date, end_date)
|
||||
elif asset_type == AssetType.CHINA_ETF:
|
||||
return self._fetch_china_etf(code, start_date, end_date)
|
||||
elif asset_type == AssetType.US_INDEX:
|
||||
return self._fetch_us_index(code, start_date, end_date)
|
||||
elif asset_type == AssetType.HK_INDEX:
|
||||
return self._fetch_hk_index(code, start_date, end_date)
|
||||
elif asset_type == AssetType.FUTURES:
|
||||
return self._fetch_futures(code, start_date, end_date)
|
||||
elif asset_type == AssetType.CRYPTO:
|
||||
return self._fetch_crypto(code, start_date, end_date)
|
||||
else:
|
||||
print(f"⚠️ 未知资产类型: {code} -> {asset_type}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
if attempt < retry - 1:
|
||||
time.sleep(2)
|
||||
else:
|
||||
print(f"✗ 获取 {code} 失败 (尝试 {attempt+1}/{retry}): {e}")
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
# ============================================================
|
||||
# 分层实现:各资产类型独立方法
|
||||
# ============================================================
|
||||
|
||||
def _fetch_china_index(
|
||||
self,
|
||||
code: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
获取A股指数
|
||||
|
||||
特点:Tushare API,无需SSH隧道
|
||||
"""
|
||||
return self._tushare.fetch_index(code, start_date, end_date)
|
||||
|
||||
def _fetch_china_etf(
|
||||
self,
|
||||
code: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
获取A股ETF价格
|
||||
|
||||
特点:Tushare fund_daily接口
|
||||
"""
|
||||
return self._tushare.fetch_etf(code, start_date, end_date)
|
||||
|
||||
def fetch_etf_with_nav(
|
||||
self,
|
||||
code: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
|
||||
"""
|
||||
获取ETF价格 + 净值
|
||||
|
||||
用于计算溢价率
|
||||
|
||||
Args:
|
||||
code: ETF代码
|
||||
|
||||
Returns:
|
||||
(price_df, nav_df)
|
||||
"""
|
||||
price_df = self._tushare.fetch_etf(code, start_date, end_date)
|
||||
nav_df = self._tushare.fetch_etf_nav(code, start_date, end_date)
|
||||
return price_df, nav_df
|
||||
|
||||
def _fetch_us_index(
|
||||
self,
|
||||
code: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
获取美股指数
|
||||
|
||||
特点:YFinance,需要SSH隧道,指数代码转换
|
||||
"""
|
||||
self._start_tunnel()
|
||||
return self._yfinance.fetch(code, start_date, end_date)
|
||||
|
||||
def _fetch_hk_index(
|
||||
self,
|
||||
code: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
获取港股指数
|
||||
|
||||
特点:YFinance,需要SSH隧道
|
||||
"""
|
||||
self._start_tunnel()
|
||||
return self._yfinance.fetch(code, start_date, end_date)
|
||||
|
||||
def _fetch_futures(
|
||||
self,
|
||||
code: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
获取期货数据
|
||||
|
||||
特点:
|
||||
- 中国期货(.SHF/.DCE/.CZC): Tushare
|
||||
- NYMEX(.NYM): YFinance
|
||||
"""
|
||||
if code.endswith('.NYM'):
|
||||
# NYMEX期货走YFinance
|
||||
self._start_tunnel()
|
||||
return self._yfinance.fetch(code, start_date, end_date)
|
||||
else:
|
||||
# 中国期货走Tushare
|
||||
return self._tushare.fetch_futures(code, start_date, end_date)
|
||||
|
||||
def _fetch_crypto(
|
||||
self,
|
||||
code: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
获取加密货币
|
||||
|
||||
特点:CCXT,不支持SOCKS5代理
|
||||
|
||||
TODO: 实现加密货币获取
|
||||
"""
|
||||
print(f"⚠️ 加密货币数据获取尚未实现: {code}")
|
||||
return None
|
||||
|
||||
# ============================================================
|
||||
# 批量获取
|
||||
# ============================================================
|
||||
|
||||
def fetch_batch(
|
||||
self,
|
||||
codes: List[str],
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
retry: int = 3
|
||||
) -> Dict[str, Optional[pd.DataFrame]]:
|
||||
"""
|
||||
批量获取多只标的数据
|
||||
|
||||
Args:
|
||||
codes: 代码列表
|
||||
start_date: 开始日期
|
||||
end_date: 结束日期
|
||||
|
||||
Returns:
|
||||
{code: DataFrame}
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# 按资产类型分组
|
||||
grouped = AssetTypeDetector.group_by_type(codes)
|
||||
|
||||
print(f"开始获取 {len(codes)} 只标的...")
|
||||
for asset_type, code_list in grouped.items():
|
||||
print(f" {asset_type.value}: {len(code_list)} 只")
|
||||
|
||||
# 启动隧道(港美股需要)
|
||||
self._start_tunnel()
|
||||
|
||||
for code in codes:
|
||||
results[code] = self.fetch(code, start_date, end_date, retry)
|
||||
|
||||
return results
|
||||
|
||||
# ============================================================
|
||||
# 辅助方法
|
||||
# ============================================================
|
||||
|
||||
def get_asset_type(self, code: str) -> AssetType:
|
||||
"""获取资产类型"""
|
||||
return AssetTypeDetector.detect(code)
|
||||
|
||||
def is_supported(self, code: str) -> bool:
|
||||
"""判断是否支持该代码"""
|
||||
return AssetTypeDetector.detect(code) != AssetType.UNKNOWN
|
||||
Reference in New Issue
Block a user