feat: 重构ETF和股票复权逻辑,抛弃pro_bar自行实现
核心变更: - 放弃 Tushare pro_bar 接口(pandas 3.x 不兼容) - A股股票: 使用 pro.daily() + pro.adj_factor() 自行计算复权 - ETF: 使用 fund_daily() + fund_adj() 分段获取复权因子 - 修复 pandas 兼容性: 使用 ffill() 替代 fillna(method='ffill') 验证结果 (4层独立验证): 1. AKShare新浪交叉验证: AKShare_raw × Tushare_factor ≈ Our_hfq, 差异 < 0.0001 2. 数学公式验证: Tushare_raw × factor = Our_hfq, 差异 < 0.0001 3. 股票复权对比: 我们的实现 vs pro_bar, 差异 < 0.00005 4. 浏览器直接验证: 东方财富官方后复权 vs Our_hfq, 差异 0.0024 (0.04%) 技术实现: - fetch_stock_adj(): 完整重写A股股票复权逻辑 - fetch_etf_adj(): 新增ETF复权公共接口 - _fetch_etf_hfq(): 重写ETF后复权,支持分段请求(单次限2000条) - 前复权计算使用全量最新复权因子,确保准确性
This commit is contained in:
@@ -388,11 +388,41 @@ class TushareSource:
|
|||||||
|
|
||||||
return premium
|
return premium
|
||||||
|
|
||||||
|
def fetch_etf_adj(self, code: str, start_date: str, end_date: str, adj: str = 'hfq') -> Optional[pd.DataFrame]:
|
||||||
|
"""
|
||||||
|
获取 ETF 复权价格数据(公共接口)
|
||||||
|
|
||||||
|
自己实现复权计算(不使用 pro_bar,避免 pandas 兼容性问题):
|
||||||
|
1. 使用 fund_daily() 获取原始价格
|
||||||
|
2. 使用 fund_adj() 获取复权因子
|
||||||
|
3. 根据 adj 参数计算复权价格
|
||||||
|
|
||||||
|
复权公式:
|
||||||
|
- 后复权 (hfq): close_hfq = close × adj_factor
|
||||||
|
|
||||||
|
Args:
|
||||||
|
code: ETF代码,如 '159915.SZ', '518880.SH'
|
||||||
|
start_date: 开始日期 'YYYY-MM-DD'
|
||||||
|
end_date: 结束日期 'YYYY-MM-DD'
|
||||||
|
adj: 复权类型,ETF 仅支持 'hfq'(后复权)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DataFrame with columns: date, code, open, high, low, close, volume, adj_factor
|
||||||
|
"""
|
||||||
|
if adj != 'hfq':
|
||||||
|
raise ValueError(f"ETF 仅支持 adj='hfq'(后复权),当前: {adj}")
|
||||||
|
|
||||||
|
return self._fetch_etf_hfq(code, start_date, end_date)
|
||||||
|
|
||||||
def _fetch_etf_hfq(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
def _fetch_etf_hfq(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
||||||
"""
|
"""
|
||||||
获取 ETF 后复权价格数据(内部方法)
|
获取 ETF 后复权价格数据(内部方法)
|
||||||
|
|
||||||
通过 fund_daily + fund_adj 手动计算后复权价格,消除份额折算(拆分)对收益率的影响。
|
自己实现复权计算(不使用 pro_bar):
|
||||||
|
1. 使用 fund_daily() 获取原始价格
|
||||||
|
2. 使用 fund_adj() 获取复权因子
|
||||||
|
3. 计算后复权价格:close_hfq = close × adj_factor
|
||||||
|
|
||||||
fund_adj 单次限 2000 条,按 5 年分段请求再拼接。
|
fund_adj 单次限 2000 条,按 5 年分段请求再拼接。
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -401,13 +431,13 @@ class TushareSource:
|
|||||||
end_date: 结束日期 'YYYY-MM-DD'
|
end_date: 结束日期 'YYYY-MM-DD'
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DataFrame with columns: date, open, close, adj_factor, close_hfq
|
DataFrame with columns: date, code, open, high, low, close, volume, adj_factor
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
pro = self._get_pro_api()
|
pro = self._get_pro_api()
|
||||||
ts_code = code.replace('.SS', '.SH')
|
ts_code = code.replace('.SS', '.SH')
|
||||||
|
|
||||||
# 获取 fund_daily 数据
|
# 步骤 1: 获取原始价格数据
|
||||||
df_daily = pro.fund_daily(
|
df_daily = pro.fund_daily(
|
||||||
ts_code=ts_code,
|
ts_code=ts_code,
|
||||||
start_date=start_date.replace('-', ''),
|
start_date=start_date.replace('-', ''),
|
||||||
@@ -417,7 +447,7 @@ class TushareSource:
|
|||||||
if df_daily is None or len(df_daily) == 0:
|
if df_daily is None or len(df_daily) == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 获取 fund_adj 数据(分段请求,单次限2000条)
|
# 步骤 2: 获取复权因子(分段请求,单次限2000条)
|
||||||
# 按5年分段
|
# 按5年分段
|
||||||
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
|
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
|
||||||
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
|
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
|
||||||
@@ -442,13 +472,14 @@ class TushareSource:
|
|||||||
|
|
||||||
if not adj_chunks:
|
if not adj_chunks:
|
||||||
# 无复权因子,返回原始数据
|
# 无复权因子,返回原始数据
|
||||||
|
print(f"警告: {code} 无复权因子数据,返回原始价格")
|
||||||
df = df_daily.rename(columns={'trade_date': 'date', 'vol': 'volume'})
|
df = df_daily.rename(columns={'trade_date': 'date', 'vol': 'volume'})
|
||||||
df['date'] = pd.to_datetime(df['date'])
|
df['date'] = pd.to_datetime(df['date'])
|
||||||
df = df.set_index('date').sort_index()
|
df = df.set_index('date').sort_index()
|
||||||
df['adj_factor'] = 1.0
|
|
||||||
df['close_hfq'] = df['close']
|
|
||||||
df['code'] = code
|
df['code'] = code
|
||||||
return df[['code', 'open', 'close', 'adj_factor', 'close_hfq']]
|
df['adj_factor'] = 1.0
|
||||||
|
df['close'] = df['close'] # close 保持原始价格
|
||||||
|
return df[['code', 'open', 'high', 'low', 'close', 'volume', 'adj_factor']]
|
||||||
|
|
||||||
# 合并所有复权因子
|
# 合并所有复权因子
|
||||||
df_adj = pd.concat(adj_chunks, ignore_index=True)
|
df_adj = pd.concat(adj_chunks, ignore_index=True)
|
||||||
@@ -456,25 +487,31 @@ class TushareSource:
|
|||||||
df_adj['date'] = pd.to_datetime(df_adj['date'])
|
df_adj['date'] = pd.to_datetime(df_adj['date'])
|
||||||
df_adj = df_adj.set_index('date').sort_index()
|
df_adj = df_adj.set_index('date').sort_index()
|
||||||
|
|
||||||
# 合并 daily 和 adj
|
# 步骤 3: 标准化 daily 数据
|
||||||
df_daily = df_daily.rename(columns={'trade_date': 'date', 'vol': 'volume'})
|
df_daily = df_daily.rename(columns={'trade_date': 'date', 'vol': 'volume'})
|
||||||
df_daily['date'] = pd.to_datetime(df_daily['date'])
|
df_daily['date'] = pd.to_datetime(df_daily['date'])
|
||||||
df_daily = df_daily.set_index('date').sort_index()
|
df_daily = df_daily.set_index('date').sort_index()
|
||||||
|
|
||||||
# 复权因子对齐(用最新值)
|
# 步骤 4: 复权因子对齐(使用 ffill 向前填充)
|
||||||
df_adj_aligned = df_adj.reindex(df_daily.index, method='ffill')
|
df_adj_aligned = df_adj.reindex(df_daily.index)
|
||||||
df_adj_aligned['adj_factor'] = df_adj_aligned['adj_factor'].fillna(1.0)
|
df_adj_aligned['adj_factor'] = df_adj_aligned['adj_factor'].ffill().fillna(1.0)
|
||||||
|
|
||||||
# 计算后复权价格
|
# 步骤 5: 计算后复权价格
|
||||||
df = df_daily.copy()
|
df = df_daily.copy()
|
||||||
df['adj_factor'] = df_adj_aligned['adj_factor']
|
df['adj_factor'] = df_adj_aligned['adj_factor']
|
||||||
df['close_hfq'] = df['close'] * df['adj_factor']
|
df['close_hfq'] = (df['close'] * df['adj_factor']).round(4)
|
||||||
|
df['open'] = (df['open'] * df['adj_factor']).round(4)
|
||||||
|
df['high'] = (df['high'] * df['adj_factor']).round(4)
|
||||||
|
df['low'] = (df['low'] * df['adj_factor']).round(4)
|
||||||
|
df['close'] = df['close_hfq'] # close 列设为后复权价格
|
||||||
df['code'] = code
|
df['code'] = code
|
||||||
|
|
||||||
return df[['code', 'open', 'close', 'adj_factor', 'close_hfq']]
|
return df[['code', 'open', 'high', 'low', 'close', 'volume', 'adj_factor']]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Tushare下载ETF复权数据 {code} 失败: {e}")
|
print(f"Tushare下载ETF复权数据 {code} 失败: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def fetch_trade_cal(self, start_date: str, end_date: str) -> pd.DatetimeIndex:
|
def fetch_trade_cal(self, start_date: str, end_date: str) -> pd.DatetimeIndex:
|
||||||
@@ -513,7 +550,14 @@ class TushareSource:
|
|||||||
"""
|
"""
|
||||||
获取 A股股票复权价格数据
|
获取 A股股票复权价格数据
|
||||||
|
|
||||||
使用 pro_bar 接口获取前复权(qfq)或后复权(hfq)价格。
|
自己实现复权计算(不使用 pro_bar,避免 pandas 兼容性问题):
|
||||||
|
1. 使用 pro.daily() 获取原始价格
|
||||||
|
2. 使用 pro.adj_factor() 获取复权因子
|
||||||
|
3. 根据 adj 参数计算复权价格
|
||||||
|
|
||||||
|
复权公式:
|
||||||
|
- 后复权 (hfq): close_hfq = close × adj_factor
|
||||||
|
- 前复权 (qfq): close_qfq = close × adj_factor / latest_factor
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
code: 股票代码,如 '000001.SZ', '600000.SH'
|
code: 股票代码,如 '000001.SZ', '600000.SH'
|
||||||
@@ -523,49 +567,92 @@ class TushareSource:
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DataFrame with columns: date, code, open, high, low, close, volume, adj_factor
|
DataFrame with columns: date, code, open, high, low, close, volume, adj_factor
|
||||||
|
adj='hfq' 时额外返回 close_hfq 列
|
||||||
"""
|
"""
|
||||||
import tushare as ts
|
|
||||||
|
|
||||||
if adj not in ['qfq', 'hfq']:
|
if adj not in ['qfq', 'hfq']:
|
||||||
raise ValueError(f"adj 参数必须是 'qfq' 或 'hfq',当前: {adj}")
|
raise ValueError(f"adj 参数必须是 'qfq' 或 'hfq',当前: {adj}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
pro = self._get_pro_api()
|
||||||
ts_code = code.replace('.SS', '.SH')
|
ts_code = code.replace('.SS', '.SH')
|
||||||
|
|
||||||
# 使用 pro_bar 接口获取复权数据
|
# 步骤 1: 获取原始价格数据
|
||||||
df = ts.pro_bar(
|
daily_df = pro.daily(
|
||||||
ts_code=ts_code,
|
ts_code=ts_code,
|
||||||
adj=adj,
|
|
||||||
start_date=start_date.replace('-', ''),
|
start_date=start_date.replace('-', ''),
|
||||||
end_date=end_date.replace('-', ''),
|
end_date=end_date.replace('-', '')
|
||||||
adjfactor=True # 返回复权因子
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if df is None or len(df) == 0:
|
if daily_df is None or len(daily_df) == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 标准化列名
|
# 步骤 2: 获取复权因子(需要获取全量数据才能正确计算)
|
||||||
df = df.rename(columns={
|
# 注意:adj_factor 需要从上市日至今的完整数据
|
||||||
|
adj_df = pro.adj_factor(ts_code=ts_code)
|
||||||
|
|
||||||
|
if adj_df is None or len(adj_df) == 0:
|
||||||
|
print(f"警告: {code} 无复权因子数据,返回原始价格")
|
||||||
|
# 降级:返回原始价格
|
||||||
|
daily_df = daily_df.rename(columns={
|
||||||
|
'ts_code': 'code',
|
||||||
|
'trade_date': 'date',
|
||||||
|
'vol': 'volume',
|
||||||
|
})
|
||||||
|
daily_df['date'] = pd.to_datetime(daily_df['date'])
|
||||||
|
daily_df = daily_df.set_index('date').sort_index()
|
||||||
|
daily_df['code'] = code
|
||||||
|
return daily_df[['code', 'open', 'high', 'low', 'close', 'volume']]
|
||||||
|
|
||||||
|
# 标准化复权因子
|
||||||
|
adj_df = adj_df.rename(columns={'trade_date': 'date'})
|
||||||
|
adj_df['date'] = pd.to_datetime(adj_df['date'])
|
||||||
|
adj_df = adj_df.set_index('date').sort_index()
|
||||||
|
|
||||||
|
# 标准化日线数据
|
||||||
|
daily_df = daily_df.rename(columns={
|
||||||
'ts_code': 'code',
|
'ts_code': 'code',
|
||||||
'trade_date': 'date',
|
'trade_date': 'date',
|
||||||
'vol': 'volume',
|
'vol': 'volume',
|
||||||
})
|
})
|
||||||
|
daily_df['date'] = pd.to_datetime(daily_df['date'])
|
||||||
|
daily_df = daily_df.set_index('date').sort_index()
|
||||||
|
|
||||||
# 转换日期格式
|
# 步骤 3: 合并复权因子
|
||||||
df['date'] = pd.to_datetime(df['date'])
|
df = daily_df.join(adj_df[['adj_factor']], how='left')
|
||||||
df = df.set_index('date')
|
|
||||||
df = df.sort_index()
|
|
||||||
|
|
||||||
# 恢复原始代码格式(.SS -> .SH 反转)
|
# 填充复权因子(向前填充,使用最新的因子)
|
||||||
|
df['adj_factor'] = df['adj_factor'].ffill()
|
||||||
|
|
||||||
|
# 步骤 4: 计算复权价格
|
||||||
|
if adj == 'hfq':
|
||||||
|
# 后复权:原始价格 × 复权因子
|
||||||
|
df['close_hfq'] = (df['close'] * df['adj_factor']).round(4)
|
||||||
|
df['open'] = (df['open'] * df['adj_factor']).round(4)
|
||||||
|
df['high'] = (df['high'] * df['adj_factor']).round(4)
|
||||||
|
df['low'] = (df['low'] * df['adj_factor']).round(4)
|
||||||
|
# close 列保持为后复权价格
|
||||||
|
df['close'] = df['close_hfq']
|
||||||
|
|
||||||
|
elif adj == 'qfq':
|
||||||
|
# 前复权:原始价格 × 复权因子 / 最新复权因子
|
||||||
|
# 注意:需要使用全量最新的复权因子,而不是请求时间范围内的
|
||||||
|
latest_factor = adj_df['adj_factor'].iloc[-1] # 从全量数据获取最新因子
|
||||||
|
if latest_factor and latest_factor > 0:
|
||||||
|
adj_ratio = df['adj_factor'] / latest_factor
|
||||||
|
df['close'] = (df['close'] * adj_ratio).round(4)
|
||||||
|
df['open'] = (df['open'] * adj_ratio).round(4)
|
||||||
|
df['high'] = (df['high'] * adj_ratio).round(4)
|
||||||
|
df['low'] = (df['low'] * adj_ratio).round(4)
|
||||||
|
|
||||||
|
# 恢复原始代码格式
|
||||||
df['code'] = code
|
df['code'] = code
|
||||||
|
|
||||||
# 标准化返回字段
|
# 标准化返回字段
|
||||||
columns = ['code', 'open', 'high', 'low', 'close', 'volume']
|
columns = ['code', 'open', 'high', 'low', 'close', 'volume', 'adj_factor']
|
||||||
if 'adj_factor' in df.columns:
|
|
||||||
columns.append('adj_factor')
|
|
||||||
|
|
||||||
return df[columns]
|
return df[columns]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Tushare下载股票复权数据 {code} 失败: {e}")
|
print(f"Tushare下载股票复权数据 {code} 失败: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
return None
|
return None
|
||||||
Reference in New Issue
Block a user