feat: Flask统一数据服务迁移(分层架构)

架构设计:
- 对外统一接口 fetch():自动识别资产类型并路由
- 对内分层实现:各资产类型独立方法,职责单一

新增文件:
- datasource/universal_fetcher.py: 统一数据获取器
  - _fetch_china_index: A股指数(Tushare)
  - _fetch_china_etf: A股ETF(含净值)
  - _fetch_us_index: 美股指数(YFinance+SSH)
  - _fetch_hk_index: 港股指数(YFinance+SSH)
  - _fetch_futures: 期货(Tushare/YFinance)
  - fetch_etf_with_nav: ETF价格+净值(计算溢价率)

- datasource/asset_type_detector.py: 资产类型检测器
  - AssetType枚举:9种资产类型
  - detect(): 自动识别资产类型
  - group_by_type(): 批量分组

- datasource/flask_server.py: Flask API服务
  - LRU + TTL 双缓存机制
  - 8个API端点:ohlcv、etf/nav、batch、cache等

更新:
- datasource/__init__.py: 导出新模块

验证:
- 模块导入成功
- 资产类型检测正确
- A股数据获取正常(沪深300: 5条)
This commit is contained in:
2026-05-12 21:33:19 +08:00
parent c63158c99d
commit 4e3aac5e0e
5 changed files with 1144 additions and 0 deletions

View File

@@ -0,0 +1,322 @@
"""
统一数据获取器
分层架构:对外统一接口,对内按资产类型独立实现
支持A股指数/ETF、港股指数、美股指数、期货、加密货币
用法:
from datasource import UniversalDataFetcher
fetcher = UniversalDataFetcher()
# 单标的获取(自动识别类型)
df = fetcher.fetch("000300.SH", "2024-01-01", "2024-12-31")
# ETF获取含净值
price_df, nav_df = fetcher.fetch_etf_with_nav("513100.SH", "2024-01-01", "2024-12-31")
# 批量获取
results = fetcher.fetch_batch(["000300.SH", "NDX", "N225"], "2024-01-01", "2024-12-31")
"""
import os
import time
from typing import Optional, Dict, List, Tuple
from datetime import datetime
import pandas as pd
from .tushare_source import TushareSource
from .yfinance_source import YFinanceSource
from .ssh_tunnel import SSHTunnelManager
from .asset_type_detector import AssetTypeDetector, AssetType
class UniversalDataFetcher:
"""
统一数据获取器
分层架构:
- 对外:统一 fetch() 接口,自动路由
- 对内:各资产类型独立方法,职责单一
"""
def __init__(
self,
ssh_config: Optional[Dict] = None,
use_cache: bool = True,
cache_dir: str = "data/etf_cache/daily"
):
"""
初始化
Args:
ssh_config: SSH隧道配置用于港美股
use_cache: 是否使用本地缓存
cache_dir: 缓存目录
"""
self.ssh_config = ssh_config or {}
self.use_cache = use_cache
self.cache_dir = cache_dir
# 数据源实例
self._tushare = TushareSource()
self._yfinance = YFinanceSource()
# SSH隧道延迟初始化
self._tunnel: Optional[SSHTunnelManager] = None
self._tunnel_started = False
def __enter__(self):
"""上下文管理器入口"""
self._start_tunnel()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""上下文管理器退出"""
self._stop_tunnel()
# ============================================================
# SSH隧道管理
# ============================================================
def _start_tunnel(self) -> bool:
"""启动SSH隧道"""
if self._tunnel_started:
return True
if self.ssh_config.get('enabled'):
self._tunnel = SSHTunnelManager(self.ssh_config)
if self._tunnel.start():
self._tunnel_started = True
return True
return False
return True
def _stop_tunnel(self):
"""停止SSH隧道"""
if self._tunnel:
self._tunnel.stop()
self._tunnel = None
self._tunnel_started = False
# ============================================================
# 统一入口(自动路由)
# ============================================================
def fetch(
self,
code: str,
start_date: str,
end_date: str,
retry: int = 3
) -> Optional[pd.DataFrame]:
"""
统一数据获取入口
自动识别资产类型并路由到对应方法
Args:
code: 标的代码
start_date: 开始日期 'YYYY-MM-DD'
end_date: 结束日期 'YYYY-MM-DD'
retry: 重试次数
Returns:
DataFrame with columns: date, open, high, low, close, volume
"""
asset_type = AssetTypeDetector.detect(code)
for attempt in range(retry):
try:
# 路由到具体方法
if asset_type == AssetType.CHINA_INDEX:
return self._fetch_china_index(code, start_date, end_date)
elif asset_type == AssetType.CHINA_ETF:
return self._fetch_china_etf(code, start_date, end_date)
elif asset_type == AssetType.US_INDEX:
return self._fetch_us_index(code, start_date, end_date)
elif asset_type == AssetType.HK_INDEX:
return self._fetch_hk_index(code, start_date, end_date)
elif asset_type == AssetType.FUTURES:
return self._fetch_futures(code, start_date, end_date)
elif asset_type == AssetType.CRYPTO:
return self._fetch_crypto(code, start_date, end_date)
else:
print(f"⚠️ 未知资产类型: {code} -> {asset_type}")
return None
except Exception as e:
if attempt < retry - 1:
time.sleep(2)
else:
print(f"✗ 获取 {code} 失败 (尝试 {attempt+1}/{retry}): {e}")
return None
return None
# ============================================================
# 分层实现:各资产类型独立方法
# ============================================================
def _fetch_china_index(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取A股指数
特点Tushare API无需SSH隧道
"""
return self._tushare.fetch_index(code, start_date, end_date)
def _fetch_china_etf(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取A股ETF价格
特点Tushare fund_daily接口
"""
return self._tushare.fetch_etf(code, start_date, end_date)
def fetch_etf_with_nav(
self,
code: str,
start_date: str,
end_date: str
) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
"""
获取ETF价格 + 净值
用于计算溢价率
Args:
code: ETF代码
Returns:
(price_df, nav_df)
"""
price_df = self._tushare.fetch_etf(code, start_date, end_date)
nav_df = self._tushare.fetch_etf_nav(code, start_date, end_date)
return price_df, nav_df
def _fetch_us_index(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取美股指数
特点YFinance需要SSH隧道指数代码转换
"""
self._start_tunnel()
return self._yfinance.fetch(code, start_date, end_date)
def _fetch_hk_index(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取港股指数
特点YFinance需要SSH隧道
"""
self._start_tunnel()
return self._yfinance.fetch(code, start_date, end_date)
def _fetch_futures(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取期货数据
特点:
- 中国期货(.SHF/.DCE/.CZC): Tushare
- NYMEX(.NYM): YFinance
"""
if code.endswith('.NYM'):
# NYMEX期货走YFinance
self._start_tunnel()
return self._yfinance.fetch(code, start_date, end_date)
else:
# 中国期货走Tushare
return self._tushare.fetch_futures(code, start_date, end_date)
def _fetch_crypto(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取加密货币
特点CCXT不支持SOCKS5代理
TODO: 实现加密货币获取
"""
print(f"⚠️ 加密货币数据获取尚未实现: {code}")
return None
# ============================================================
# 批量获取
# ============================================================
def fetch_batch(
self,
codes: List[str],
start_date: str,
end_date: str,
retry: int = 3
) -> Dict[str, Optional[pd.DataFrame]]:
"""
批量获取多只标的数据
Args:
codes: 代码列表
start_date: 开始日期
end_date: 结束日期
Returns:
{code: DataFrame}
"""
results = {}
# 按资产类型分组
grouped = AssetTypeDetector.group_by_type(codes)
print(f"开始获取 {len(codes)} 只标的...")
for asset_type, code_list in grouped.items():
print(f" {asset_type.value}: {len(code_list)}")
# 启动隧道(港美股需要)
self._start_tunnel()
for code in codes:
results[code] = self.fetch(code, start_date, end_date, retry)
return results
# ============================================================
# 辅助方法
# ============================================================
def get_asset_type(self, code: str) -> AssetType:
"""获取资产类型"""
return AssetTypeDetector.detect(code)
def is_supported(self, code: str) -> bool:
"""判断是否支持该代码"""
return AssetTypeDetector.detect(code) != AssetType.UNKNOWN