diff --git a/datasource/tushare_source.py b/datasource/tushare_source.py index c0e7ce5..e9d5df0 100644 --- a/datasource/tushare_source.py +++ b/datasource/tushare_source.py @@ -245,4 +245,139 @@ class TushareSource: elif self.is_futures(code): return self.fetch_futures(code, start_date, end_date) else: - return None \ No newline at end of file + return None + + def fetch_etf_adj(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]: + """ + 获取 ETF 后复权价格数据 + + 通过 fund_daily + fund_adj 手动计算后复权价格,消除份额折算(拆分)对收益率的影响。 + fund_adj 单次限 2000 条,按 5 年分段请求再拼接。 + + Args: + code: ETF代码,如 '159915.SZ', '518880.SH' + start_date: 开始日期 'YYYY-MM-DD' + end_date: 结束日期 'YYYY-MM-DD' + + Returns: + DataFrame with columns: date, open, close, adj_factor, close_hfq + """ + import tushare as ts + from datetime import datetime + + original_proxy = self._clear_proxy() + + try: + pro = self._get_pro_api() + ts_code = code.replace('.SS', '.SH') + + # 获取 fund_daily 数据 + df_daily = pro.fund_daily( + ts_code=ts_code, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', '') + ) + + if df_daily is None or len(df_daily) == 0: + return None + + # 获取 fund_adj 数据(分段请求,单次限2000条) + # 按5年分段 + start_dt = datetime.strptime(start_date, '%Y-%m-%d') + end_dt = datetime.strptime(end_date, '%Y-%m-%d') + + adj_chunks = [] + chunk_start = start_dt + while chunk_start < end_dt: + chunk_end = min(chunk_start.replace(year=chunk_start.year + 5), end_dt) + chunk_start_str = chunk_start.strftime('%Y%m%d') + chunk_end_str = chunk_end.strftime('%Y%m%d') + + df_adj_chunk = pro.fund_adj( + ts_code=ts_code, + start_date=chunk_start_str, + end_date=chunk_end_str + ) + + if df_adj_chunk is not None and len(df_adj_chunk) > 0: + adj_chunks.append(df_adj_chunk) + + chunk_start = chunk_end + + if not adj_chunks: + # 无复权因子,返回原始数据 + df = df_daily.rename(columns={'trade_date': 'date', 'vol': 'volume'}) + df['date'] = pd.to_datetime(df['date']) + df = df.set_index('date').sort_index() + df['adj_factor'] = 1.0 + df['close_hfq'] = df['close'] + return df[['code', 'open', 'close', 'adj_factor', 'close_hfq']] + + # 合并所有复权因子 + df_adj = pd.concat(adj_chunks, ignore_index=True) + df_adj = df_adj.rename(columns={'trade_date': 'date'}) + df_adj['date'] = pd.to_datetime(df_adj['date']) + df_adj = df_adj.set_index('date').sort_index() + + # 合并 daily 和 adj + df_daily = df_daily.rename(columns={'trade_date': 'date', 'vol': 'volume'}) + df_daily['date'] = pd.to_datetime(df_daily['date']) + df_daily = df_daily.set_index('date').sort_index() + + # 复权因子对齐(用最新值) + df_adj_aligned = df_adj.reindex(df_daily.index, method='ffill') + df_adj_aligned['adj_factor'] = df_adj_aligned['adj_factor'].fillna(1.0) + + # 计算后复权价格 + df = df_daily.copy() + df['adj_factor'] = df_adj_aligned['adj_factor'] + df['close_hfq'] = df['close'] * df['adj_factor'] + df['code'] = code + + return df[['code', 'open', 'close', 'adj_factor', 'close_hfq']] + + except Exception as e: + print(f"Tushare下载ETF复权数据 {code} 失败: {e}") + return None + + finally: + self._restore_proxy(original_proxy) + + def fetch_trade_cal(self, start_date: str, end_date: str) -> pd.DatetimeIndex: + """ + 获取 A 股(上交所 SSE)官方交易日历 + + Args: + start_date: 开始日期 'YYYY-MM-DD' + end_date: 结束日期 'YYYY-MM-DD' + + Returns: + DatetimeIndex: A股交易日日期序列 + """ + import tushare as ts + + original_proxy = self._clear_proxy() + + try: + pro = self._get_pro_api() + + df = pro.trade_cal( + exchange='SSE', + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', ''), + is_open='1' + ) + + if df is None or len(df) == 0: + return pd.DatetimeIndex([]) + + # 提取交易日并转换为 DatetimeIndex + trade_dates = pd.to_datetime(df['cal_date']) + return pd.DatetimeIndex(trade_dates.sort_values()) + + except Exception as e: + print(f"Tushare下载交易日历失败: {e}") + return pd.DatetimeIndex([]) + + finally: + self._restore_proxy(original_proxy) \ No newline at end of file