refactor: 统一ETF获取接口为单个DataFrame返回

重构说明:
- TushareSource.fetch_etf(): 新增 adj 参数,统一接口
  - 返回单个 DataFrame
  - df.attrs['nav']: 净值 DataFrame
  - df.attrs['premium']: 溢价率 Series
- 移除冗余方法:
  - fetch_etf_with_nav() → 合并到 fetch_etf()
  - fetch_etf_adj() → 重命名为 _fetch_etf_hfq()(内部方法)
- UniversalDataFetcher: 适配新接口
  - fetch_etf_with_nav(): 从 df.attrs 提取元数据(兼容旧接口)
  - fetch_etf_adj(): 调用 fetch_etf(adj='hfq')
- Flask: 更新注释说明

架构优势:
- 单一接口:一个方法搞定所有 ETF 数据获取
- 数据一致:所有数据在一个 DataFrame 对象中
- 缓存友好:只需缓存一个 DataFrame
- 扩展性强:新增数据直接添加到 attrs
This commit is contained in:
2026-05-23 22:36:23 +08:00
parent 2867ae8d21
commit feb7c78e68
3 changed files with 66 additions and 57 deletions

View File

@@ -113,16 +113,55 @@ class TushareSource:
print(f"Tushare下载期货 {code} 失败: {e}")
return None
def fetch_etf(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
def fetch_etf(self, code: str, start_date: str, end_date: str, adj: str = 'raw') -> Optional[pd.DataFrame]:
"""
获取ETF价格数据
统一 ETF 获取接口
Args:
code: ETF代码'159915.SZ', '518880.SH'
start_date: 开始日期 'YYYY-MM-DD'
end_date: 结束日期 'YYYY-MM-DD'
adj: 复权类型 'raw'(原始) / 'hfq'(后复权),默认 'raw'
Returns:
DataFrame with columns: date, open, high, low, close, volume
adj='hfq' 时额外返回 adj_factor, close_hfq
DataFrame.attrs 附加元数据:
- attrs['nav']: 净值 DataFrame
- attrs['premium']: 溢价率 Series始终基于原始价格计算
"""
# 校验 adj 参数
if adj not in ['raw', 'hfq']:
raise ValueError(f"ETF 仅支持 adj='raw''hfq',当前: {adj}")
# 1. 获取价格数据
if adj == 'hfq':
price_df = self._fetch_etf_hfq(code, start_date, end_date)
else:
price_df = self._fetch_etf_raw(code, start_date, end_date)
if price_df is None:
return None
# 2. 获取净值(附加到 attrs
nav_df = self.fetch_etf_nav(code, start_date, end_date)
price_df.attrs['nav'] = nav_df
# 3. 计算溢价率(始终使用原始价格)
if nav_df is not None and len(nav_df) > 0:
# hfq 时需要获取原始价格来计算溢价率
price_for_premium = price_df if adj == 'raw' else self._fetch_etf_raw(code, start_date, end_date)
if price_for_premium is not None:
premium_series = self._calculate_premium_series(price_for_premium, nav_df)
price_df.attrs['premium'] = premium_series
return price_df
def _fetch_etf_raw(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
"""获取 ETF 原始价格数据(内部方法)"""
try:
pro = self._get_pro_api()
ts_code = code.replace(".SS", ".SH")
df = pro.fund_daily(
@@ -276,39 +315,6 @@ class TushareSource:
prefix = code[:2]
return prefix in ['51', '52', '15', '16']
def fetch_etf_with_nav(
self,
code: str,
start_date: str,
end_date: str
) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.Series]]:
"""
获取ETF完整数据价格 + 净值 + 溢价率序列)
Args:
code: ETF代码'159915.SZ', '518880.SH'
start_date: 开始日期 'YYYY-MM-DD'
end_date: 结束日期 'YYYY-MM-DD'
Returns:
(price_df, nav_df, premium_series)
- price_df: ETF价格数据 (OHLCV)
- nav_df: ETF净值数据
- premium_series: 溢价率序列 (每天计算)
"""
# 1. 获取价格
price_df = self.fetch_etf(code, start_date, end_date)
# 2. 获取净值
nav_df = self.fetch_etf_nav(code, start_date, end_date)
# 3. 计算溢价率
premium_series = None
if price_df is not None and nav_df is not None and len(nav_df) > 0:
premium_series = self._calculate_premium_series(price_df, nav_df)
return price_df, nav_df, premium_series
def _calculate_premium_series(
self,
price_df: pd.DataFrame,
@@ -382,9 +388,9 @@ class TushareSource:
return premium
def fetch_etf_adj(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
def _fetch_etf_hfq(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
"""
获取 ETF 后复权价格数据
获取 ETF 后复权价格数据(内部方法)
通过 fund_daily + fund_adj 手动计算后复权价格,消除份额折算(拆分)对收益率的影响。
fund_adj 单次限 2000 条,按 5 年分段请求再拼接。