Files
etf/datasource/universal_fetcher.py
aszerW 4e3aac5e0e feat: Flask统一数据服务迁移(分层架构)
架构设计:
- 对外统一接口 fetch():自动识别资产类型并路由
- 对内分层实现:各资产类型独立方法,职责单一

新增文件:
- datasource/universal_fetcher.py: 统一数据获取器
  - _fetch_china_index: A股指数(Tushare)
  - _fetch_china_etf: A股ETF(含净值)
  - _fetch_us_index: 美股指数(YFinance+SSH)
  - _fetch_hk_index: 港股指数(YFinance+SSH)
  - _fetch_futures: 期货(Tushare/YFinance)
  - fetch_etf_with_nav: ETF价格+净值(计算溢价率)

- datasource/asset_type_detector.py: 资产类型检测器
  - AssetType枚举:9种资产类型
  - detect(): 自动识别资产类型
  - group_by_type(): 批量分组

- datasource/flask_server.py: Flask API服务
  - LRU + TTL 双缓存机制
  - 8个API端点:ohlcv、etf/nav、batch、cache等

更新:
- datasource/__init__.py: 导出新模块

验证:
- 模块导入成功
- 资产类型检测正确
- A股数据获取正常(沪深300: 5条)
2026-05-12 21:33:19 +08:00

322 lines
9.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
统一数据获取器
分层架构:对外统一接口,对内按资产类型独立实现
支持A股指数/ETF、港股指数、美股指数、期货、加密货币
用法:
from datasource import UniversalDataFetcher
fetcher = UniversalDataFetcher()
# 单标的获取(自动识别类型)
df = fetcher.fetch("000300.SH", "2024-01-01", "2024-12-31")
# ETF获取含净值
price_df, nav_df = fetcher.fetch_etf_with_nav("513100.SH", "2024-01-01", "2024-12-31")
# 批量获取
results = fetcher.fetch_batch(["000300.SH", "NDX", "N225"], "2024-01-01", "2024-12-31")
"""
import os
import time
from typing import Optional, Dict, List, Tuple
from datetime import datetime
import pandas as pd
from .tushare_source import TushareSource
from .yfinance_source import YFinanceSource
from .ssh_tunnel import SSHTunnelManager
from .asset_type_detector import AssetTypeDetector, AssetType
class UniversalDataFetcher:
"""
统一数据获取器
分层架构:
- 对外:统一 fetch() 接口,自动路由
- 对内:各资产类型独立方法,职责单一
"""
def __init__(
self,
ssh_config: Optional[Dict] = None,
use_cache: bool = True,
cache_dir: str = "data/etf_cache/daily"
):
"""
初始化
Args:
ssh_config: SSH隧道配置用于港美股
use_cache: 是否使用本地缓存
cache_dir: 缓存目录
"""
self.ssh_config = ssh_config or {}
self.use_cache = use_cache
self.cache_dir = cache_dir
# 数据源实例
self._tushare = TushareSource()
self._yfinance = YFinanceSource()
# SSH隧道延迟初始化
self._tunnel: Optional[SSHTunnelManager] = None
self._tunnel_started = False
def __enter__(self):
"""上下文管理器入口"""
self._start_tunnel()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""上下文管理器退出"""
self._stop_tunnel()
# ============================================================
# SSH隧道管理
# ============================================================
def _start_tunnel(self) -> bool:
"""启动SSH隧道"""
if self._tunnel_started:
return True
if self.ssh_config.get('enabled'):
self._tunnel = SSHTunnelManager(self.ssh_config)
if self._tunnel.start():
self._tunnel_started = True
return True
return False
return True
def _stop_tunnel(self):
"""停止SSH隧道"""
if self._tunnel:
self._tunnel.stop()
self._tunnel = None
self._tunnel_started = False
# ============================================================
# 统一入口(自动路由)
# ============================================================
def fetch(
self,
code: str,
start_date: str,
end_date: str,
retry: int = 3
) -> Optional[pd.DataFrame]:
"""
统一数据获取入口
自动识别资产类型并路由到对应方法
Args:
code: 标的代码
start_date: 开始日期 'YYYY-MM-DD'
end_date: 结束日期 'YYYY-MM-DD'
retry: 重试次数
Returns:
DataFrame with columns: date, open, high, low, close, volume
"""
asset_type = AssetTypeDetector.detect(code)
for attempt in range(retry):
try:
# 路由到具体方法
if asset_type == AssetType.CHINA_INDEX:
return self._fetch_china_index(code, start_date, end_date)
elif asset_type == AssetType.CHINA_ETF:
return self._fetch_china_etf(code, start_date, end_date)
elif asset_type == AssetType.US_INDEX:
return self._fetch_us_index(code, start_date, end_date)
elif asset_type == AssetType.HK_INDEX:
return self._fetch_hk_index(code, start_date, end_date)
elif asset_type == AssetType.FUTURES:
return self._fetch_futures(code, start_date, end_date)
elif asset_type == AssetType.CRYPTO:
return self._fetch_crypto(code, start_date, end_date)
else:
print(f"⚠️ 未知资产类型: {code} -> {asset_type}")
return None
except Exception as e:
if attempt < retry - 1:
time.sleep(2)
else:
print(f"✗ 获取 {code} 失败 (尝试 {attempt+1}/{retry}): {e}")
return None
return None
# ============================================================
# 分层实现:各资产类型独立方法
# ============================================================
def _fetch_china_index(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取A股指数
特点Tushare API无需SSH隧道
"""
return self._tushare.fetch_index(code, start_date, end_date)
def _fetch_china_etf(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取A股ETF价格
特点Tushare fund_daily接口
"""
return self._tushare.fetch_etf(code, start_date, end_date)
def fetch_etf_with_nav(
self,
code: str,
start_date: str,
end_date: str
) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
"""
获取ETF价格 + 净值
用于计算溢价率
Args:
code: ETF代码
Returns:
(price_df, nav_df)
"""
price_df = self._tushare.fetch_etf(code, start_date, end_date)
nav_df = self._tushare.fetch_etf_nav(code, start_date, end_date)
return price_df, nav_df
def _fetch_us_index(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取美股指数
特点YFinance需要SSH隧道指数代码转换
"""
self._start_tunnel()
return self._yfinance.fetch(code, start_date, end_date)
def _fetch_hk_index(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取港股指数
特点YFinance需要SSH隧道
"""
self._start_tunnel()
return self._yfinance.fetch(code, start_date, end_date)
def _fetch_futures(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取期货数据
特点:
- 中国期货(.SHF/.DCE/.CZC): Tushare
- NYMEX(.NYM): YFinance
"""
if code.endswith('.NYM'):
# NYMEX期货走YFinance
self._start_tunnel()
return self._yfinance.fetch(code, start_date, end_date)
else:
# 中国期货走Tushare
return self._tushare.fetch_futures(code, start_date, end_date)
def _fetch_crypto(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取加密货币
特点CCXT不支持SOCKS5代理
TODO: 实现加密货币获取
"""
print(f"⚠️ 加密货币数据获取尚未实现: {code}")
return None
# ============================================================
# 批量获取
# ============================================================
def fetch_batch(
self,
codes: List[str],
start_date: str,
end_date: str,
retry: int = 3
) -> Dict[str, Optional[pd.DataFrame]]:
"""
批量获取多只标的数据
Args:
codes: 代码列表
start_date: 开始日期
end_date: 结束日期
Returns:
{code: DataFrame}
"""
results = {}
# 按资产类型分组
grouped = AssetTypeDetector.group_by_type(codes)
print(f"开始获取 {len(codes)} 只标的...")
for asset_type, code_list in grouped.items():
print(f" {asset_type.value}: {len(code_list)}")
# 启动隧道(港美股需要)
self._start_tunnel()
for code in codes:
results[code] = self.fetch(code, start_date, end_date, retry)
return results
# ============================================================
# 辅助方法
# ============================================================
def get_asset_type(self, code: str) -> AssetType:
"""获取资产类型"""
return AssetTypeDetector.detect(code)
def is_supported(self, code: str) -> bool:
"""判断是否支持该代码"""
return AssetTypeDetector.detect(code) != AssetType.UNKNOWN