fix: 数据源路由修复与因子计算改进

1. 修复期货路由逻辑:NYMEX期货(.NYM)走YFinance而非Tushare
2. 添加SSH隧道路径修复(原引擎)
3. 因子计算只使用close列(处理部分指数只有收盘价的情况)
4. 添加数据不足和缺失率剔除日志

收益对比:
- 原引擎(剔除国债): 累计1804%, 调仓459次
- 新框架: 累计772%, 调仓1276次

差异原因待查:
- 国债剔除逻辑不同
- 调仓频率差异
This commit is contained in:
2026-05-12 00:47:43 +08:00
parent a7a4a69153
commit 19131c41dd
6 changed files with 76 additions and 36 deletions

View File

@@ -125,7 +125,7 @@ class RotationStrategy(StrategyBase):
)
# 调用 fetch_all
index_data, etf_data, etf_nav_data, benchmark_data, valid_codes, index_ohlcv_data = \
index_data, etf_data, etf_nav_data, benchmark_data, valid_codes, index_ohlcv_data, etf_code_map = \
data_source.fetch_all(
code_config=code_list_config,
benchmark_code=benchmark_code,
@@ -139,7 +139,8 @@ class RotationStrategy(StrategyBase):
'etf_data': etf_data,
'etf_nav_data': etf_nav_data,
'benchmark_data': benchmark_data,
'valid_codes': valid_codes
'valid_codes': valid_codes,
'etf_code_map': etf_code_map # {指数代码: ETF代码} 映射
}
def compute_factors(self, data: dict) -> pd.DataFrame:
@@ -148,13 +149,42 @@ class RotationStrategy(StrategyBase):
valid_codes = data['valid_codes']
factor_values = {}
final_valid_codes = []
for code in valid_codes:
df = index_data[code]
if len(df) >= self.n_days:
values = self._factor.compute(df)
factor_values[code] = values
# 只使用 close 列计算因子(匹配原引擎逻辑:部分指数只有收盘价)
if 'close' in df.columns:
close_series = df['close'].dropna()
else:
close_series = df.dropna()
# 原引擎剔除逻辑close 数据需要至少 n_days + 1 条
if len(close_series) < self.n_days + 1:
print(f" ⚠ 剔除 {code}: 数据不足 ({len(close_series)} < {self.n_days + 1})")
continue
# 只传入 close 列给因子计算器
close_df = pd.DataFrame({'close': close_series})
values = self._factor.compute(close_df)
factor_values[code] = values
final_valid_codes.append(code)
return pd.DataFrame(factor_values)
factor_df = pd.DataFrame(factor_values)
# 过滤缺失率过高的标的
total_rows = len(factor_df)
for code in final_valid_codes:
if code in factor_df.columns:
null_pct = factor_df[code].isnull().sum() / total_rows
if null_pct > 0.5:
print(f" ⚠ 剔除 {code}: 缺失率 {null_pct:.1%} 过高")
factor_df = factor_df.drop(columns=[code])
# 更新有效代码列表
data['valid_codes'] = [c for c in final_valid_codes if c in factor_df.columns]
return factor_df
def generate_signals(self, factor_df: pd.DataFrame) -> pd.DataFrame:
"""生成信号"""
@@ -198,25 +228,35 @@ class RotationStrategy(StrategyBase):
# 4. 执行回测
print("\n执行回测...")
# 使用对齐后的指数收盘价数据获取日期基准
index_close = data.get('index_close')
# 获取ETF数据和代码映射
etf_data = data.get('etf_data')
etf_code_map = data.get('etf_code_map', {}) # {指数代码: ETF代码}
# 计算日收益率(使用对齐后的收盘价数据
if index_close is not None and not index_close.empty:
returns_df = index_close.pct_change()
returns_df.columns = [f'日收益率_{col}' for col in returns_df.columns]
else:
# 回退到原始数据
# 计算日收益率(使用ETF价格数据匹配原引擎逻辑
if etf_data is not None and not etf_data.empty:
# 使用ETF价格计算收益列名保持指数代码格式
returns_data = {}
for code in valid_codes:
if code in index_data:
df = index_data[code]
returns_data[f'日收益率_{code}'] = df['close'].pct_change()
for idx_code in valid_codes:
etf_code = etf_code_map.get(idx_code, idx_code)
if etf_code in etf_data.columns:
returns_data[f'日收益率_{idx_code}'] = etf_data[etf_code].pct_change()
returns_df = pd.DataFrame(returns_data)
if valid_codes:
first_code = valid_codes[0]
returns_df.index = index_data[first_code].index
else:
# 回退到指数收盘价数据
index_close = data.get('index_close')
if index_close is not None and not index_close.empty:
returns_df = index_close.pct_change()
returns_df.columns = [f'日收益率_{col}' for col in returns_df.columns]
else:
returns_data = {}
for code in valid_codes:
if code in index_data:
df = index_data[code]
returns_data[f'日收益率_{code}'] = df['close'].pct_change()
returns_df = pd.DataFrame(returns_data)
if valid_codes:
first_code = valid_codes[0]
returns_df.index = index_data[first_code].index
# 确保信号和收益率数据日期对齐
common_dates = signals.index.intersection(returns_df.index)

View File

@@ -164,10 +164,6 @@ class TopNSelector(SignalGenerator):
factor_cols: List[str]
) -> bool:
"""检查是否应该调仓(得分阈值检查)"""
if self.rebalance_threshold <= 0:
# 无阈值,直接调仓
return target != current_held
# 提取当前持仓和目标持仓的代码
old_codes = [c for c in current_held.split(',') if c]
new_codes = [c for c in target.split(',') if c]
@@ -176,13 +172,14 @@ class TopNSelector(SignalGenerator):
return True
if set(new_codes) == set(old_codes):
return False
return False # 组合完全相同,不调仓
# 计算新旧组合的总得分
old_total = sum(float(row.get(col, 0)) for col in factor_cols if col in old_codes)
new_total = sum(float(row.get(col, 0)) for col in factor_cols if col in new_codes)
# 新组合得分需超过当前组合一定比例才调仓
# 即使 threshold=0也要确保 new_total >= old_total
if old_total > 0:
return (new_total / old_total - 1) >= self.rebalance_threshold