From feb7c78e688c186b8c601635cfb196b76053b71c Mon Sep 17 00:00:00 2001 From: aszerW Date: Sat, 23 May 2026 22:36:23 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20=E7=BB=9F=E4=B8=80ETF=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E6=8E=A5=E5=8F=A3=E4=B8=BA=E5=8D=95=E4=B8=AADataFrame?= =?UTF-8?q?=E8=BF=94=E5=9B=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 重构说明: - TushareSource.fetch_etf(): 新增 adj 参数,统一接口 - 返回单个 DataFrame - df.attrs['nav']: 净值 DataFrame - df.attrs['premium']: 溢价率 Series - 移除冗余方法: - fetch_etf_with_nav() → 合并到 fetch_etf() - fetch_etf_adj() → 重命名为 _fetch_etf_hfq()(内部方法) - UniversalDataFetcher: 适配新接口 - fetch_etf_with_nav(): 从 df.attrs 提取元数据(兼容旧接口) - fetch_etf_adj(): 调用 fetch_etf(adj='hfq') - Flask: 更新注释说明 架构优势: - 单一接口:一个方法搞定所有 ETF 数据获取 - 数据一致:所有数据在一个 DataFrame 对象中 - 缓存友好:只需缓存一个 DataFrame - 扩展性强:新增数据直接添加到 attrs --- datasource/flask_server.py | 4 +- datasource/tushare_source.py | 82 ++++++++++++++++++--------------- datasource/universal_fetcher.py | 37 ++++++++------- 3 files changed, 66 insertions(+), 57 deletions(-) diff --git a/datasource/flask_server.py b/datasource/flask_server.py index bbbe305..881ace3 100644 --- a/datasource/flask_server.py +++ b/datasource/flask_server.py @@ -713,12 +713,12 @@ def get_ohlcv(): result['asset_type'] = final_type.value # 使用最终类型 result['adj'] = adj # 返回使用的 adj 参数 - # 如果是中国 ETF,附加净值和溢价率数据(数据层已处理) + # 如果是中国 ETF,附加净值和溢价率数据(数据层已处理,通过 df.attrs 传递) if final_type == AssetType.CHINA_ETF: try: f = get_fetcher() with f: - # 调用 TushareSource 的完整方法 + # 调用统一接口,数据通过 DataFrame.attrs 传递 price_df, nav_df, premium_series = f.fetch_etf_with_nav(code, start, end) # 添加净值数据 diff --git a/datasource/tushare_source.py b/datasource/tushare_source.py index 13998e9..9d3c4f5 100644 --- a/datasource/tushare_source.py +++ b/datasource/tushare_source.py @@ -113,16 +113,55 @@ class TushareSource: print(f"Tushare下载期货 {code} 失败: {e}") return None - def fetch_etf(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]: + def fetch_etf(self, code: str, start_date: str, end_date: str, adj: str = 'raw') -> Optional[pd.DataFrame]: """ - 获取ETF价格数据 + 统一 ETF 获取接口 Args: code: ETF代码,如 '159915.SZ', '518880.SH' + start_date: 开始日期 'YYYY-MM-DD' + end_date: 结束日期 'YYYY-MM-DD' + adj: 复权类型 'raw'(原始) / 'hfq'(后复权),默认 'raw' + + Returns: + DataFrame with columns: date, open, high, low, close, volume + adj='hfq' 时额外返回 adj_factor, close_hfq + + DataFrame.attrs 附加元数据: + - attrs['nav']: 净值 DataFrame + - attrs['premium']: 溢价率 Series(始终基于原始价格计算) """ + # 校验 adj 参数 + if adj not in ['raw', 'hfq']: + raise ValueError(f"ETF 仅支持 adj='raw' 或 'hfq',当前: {adj}") + + # 1. 获取价格数据 + if adj == 'hfq': + price_df = self._fetch_etf_hfq(code, start_date, end_date) + else: + price_df = self._fetch_etf_raw(code, start_date, end_date) + + if price_df is None: + return None + + # 2. 获取净值(附加到 attrs) + nav_df = self.fetch_etf_nav(code, start_date, end_date) + price_df.attrs['nav'] = nav_df + + # 3. 计算溢价率(始终使用原始价格) + if nav_df is not None and len(nav_df) > 0: + # hfq 时需要获取原始价格来计算溢价率 + price_for_premium = price_df if adj == 'raw' else self._fetch_etf_raw(code, start_date, end_date) + if price_for_premium is not None: + premium_series = self._calculate_premium_series(price_for_premium, nav_df) + price_df.attrs['premium'] = premium_series + + return price_df + + def _fetch_etf_raw(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]: + """获取 ETF 原始价格数据(内部方法)""" try: pro = self._get_pro_api() - ts_code = code.replace(".SS", ".SH") df = pro.fund_daily( @@ -276,39 +315,6 @@ class TushareSource: prefix = code[:2] return prefix in ['51', '52', '15', '16'] - def fetch_etf_with_nav( - self, - code: str, - start_date: str, - end_date: str - ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.Series]]: - """ - 获取ETF完整数据(价格 + 净值 + 溢价率序列) - - Args: - code: ETF代码,如 '159915.SZ', '518880.SH' - start_date: 开始日期 'YYYY-MM-DD' - end_date: 结束日期 'YYYY-MM-DD' - - Returns: - (price_df, nav_df, premium_series) - - price_df: ETF价格数据 (OHLCV) - - nav_df: ETF净值数据 - - premium_series: 溢价率序列 (每天计算) - """ - # 1. 获取价格 - price_df = self.fetch_etf(code, start_date, end_date) - - # 2. 获取净值 - nav_df = self.fetch_etf_nav(code, start_date, end_date) - - # 3. 计算溢价率 - premium_series = None - if price_df is not None and nav_df is not None and len(nav_df) > 0: - premium_series = self._calculate_premium_series(price_df, nav_df) - - return price_df, nav_df, premium_series - def _calculate_premium_series( self, price_df: pd.DataFrame, @@ -382,9 +388,9 @@ class TushareSource: return premium - def fetch_etf_adj(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]: + def _fetch_etf_hfq(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]: """ - 获取 ETF 后复权价格数据 + 获取 ETF 后复权价格数据(内部方法) 通过 fund_daily + fund_adj 手动计算后复权价格,消除份额折算(拆分)对收益率的影响。 fund_adj 单次限 2000 条,按 5 年分段请求再拼接。 diff --git a/datasource/universal_fetcher.py b/datasource/universal_fetcher.py index 85e5445..f4b540e 100644 --- a/datasource/universal_fetcher.py +++ b/datasource/universal_fetcher.py @@ -306,9 +306,9 @@ class UniversalDataFetcher: end_date: str ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.Series]]: """ - 获取ETF价格 + 净值 + 溢价率序列 + 获取ETF价格 + 净值 + 溢价率序列(兼容旧接口) - 直接调用 TushareSource 的完整方法,封装业务逻辑 + 内部调用统一的 fetch_etf() 方法,从 DataFrame.attrs 提取元数据 Args: code: ETF代码 @@ -318,12 +318,20 @@ class UniversalDataFetcher: Returns: (price_df, nav_df, premium_series) - price_df: ETF价格数据 (OHLCV) - - nav_df: ETF净值数据 - - premium_series: 溢价率序列 (每天计算) + - nav_df: ETF净值数据(来自 df.attrs['nav']) + - premium_series: 溢价率序列(来自 df.attrs['premium']) """ - return self._tushare.fetch_etf_with_nav(code, start_date, end_date) - - # 移除 _calculate_premium_series 方法(已下移到 TushareSource) + # 调用统一的 fetch_etf() 方法 + df = self._tushare.fetch_etf(code, start_date, end_date, adj='raw') + + if df is None: + return None, None, None + + # 从 attrs 提取元数据 + nav_df = df.attrs.get('nav') + premium_series = df.attrs.get('premium') + + return df, nav_df, premium_series # ============================================================ # 内部方法:特殊资产类型(保留) @@ -450,11 +458,9 @@ class UniversalDataFetcher: end_date: str ) -> Optional[pd.DataFrame]: """ - 获取 A股 ETF 后复权价格 + 获取 A股 ETF 后复权价格(兼容旧接口) - 通过 fund_daily + fund_adj 手动计算后复权价格 - - 消除份额折算(拆分)对收益率的影响 - - 适用于计算真实收益率 + 内部调用统一的 fetch_etf(adj='hfq') 方法 Args: code: ETF代码,如 '159915.SZ', '513100.SH' @@ -463,13 +469,10 @@ class UniversalDataFetcher: Returns: DataFrame with columns: date, open, close, adj_factor, close_hfq - - 示例: - # 纳指ETF后复权(正确计算收益率) - df = fetcher.fetch_etf_adj("513100.SH", "2020-01-01", "2024-12-31") - # 使用 close_hfq 计算收益率,而非 close + DataFrame.attrs['nav']: 净值 DataFrame + DataFrame.attrs['premium']: 溢价率 Series(基于原始价格计算) """ - return self._tushare.fetch_etf_adj(code, start_date, end_date) + return self._tushare.fetch_etf(code, start_date, end_date, adj='hfq') def fetch_us_adj( self,