fix: 完整匹配原引擎剔除逻辑和因子对齐顺序
关键修复: 1. OHLCV整行dropna()剔除逻辑(匹配原引擎) - 国债 931862.CSI 因 open/high/low 全空被剔除 - 原引擎: df = index_ohlcv_data[code].dropna() - 新框架: 同样逻辑 2. 因子计算顺序:先计算因子再对齐到A股交易日历 - 原引擎: factor_series = rolling(n).apply(); factor_aligned = reindex(ffill) - 新框架: 同样顺序,避免ffill填充的重复值影响rolling窗口 对比结果: | 指标 | 原引擎 | 新框架(修复后) | |------|--------|---------------| | 累计收益 | 1804% | 1999% | | 信号匹配率 | - | 90.3% | | 调仓次数 | 459 | ~578 | 剩余195%收益差距可能来自收益计算细节差异
This commit is contained in:
@@ -144,7 +144,7 @@ class RotationStrategy(StrategyBase):
|
||||
}
|
||||
|
||||
def compute_factors(self, data: dict) -> pd.DataFrame:
|
||||
"""计算因子值(匹配原引擎:境外数据对齐到A股交易日历后再计算因子)"""
|
||||
"""计算因子值(匹配原引擎:先计算因子再对齐到A股交易日历)"""
|
||||
index_data = data['index_data']
|
||||
valid_codes = data['valid_codes']
|
||||
|
||||
@@ -153,7 +153,6 @@ class RotationStrategy(StrategyBase):
|
||||
if index_close is not None:
|
||||
a_share_dates = index_close.index
|
||||
else:
|
||||
# 回退:使用第一个A股标的的索引
|
||||
for code in valid_codes:
|
||||
if code.endswith('.SH') or code.endswith('.SZ') or code.endswith('.CSI'):
|
||||
a_share_dates = index_data[code].index
|
||||
@@ -165,27 +164,40 @@ class RotationStrategy(StrategyBase):
|
||||
final_valid_codes = []
|
||||
|
||||
for code in valid_codes:
|
||||
df = index_data[code]
|
||||
# 只使用 close 列(匹配原引擎逻辑)
|
||||
if 'close' in df.columns:
|
||||
close_series = df['close'].dropna()
|
||||
df = index_data[code].copy()
|
||||
|
||||
# 原引擎剔除逻辑:如果有OHLCV列,整行dropna()后再检查长度
|
||||
# 这会剔除国债等只有close数据的标的(open/high/low全空)
|
||||
ohlcv_cols = ['open', 'high', 'low', 'close', 'volume']
|
||||
has_ohlcv = all(col in df.columns for col in ['open', 'high', 'low', 'close'])
|
||||
|
||||
if has_ohlcv:
|
||||
# 原引擎逻辑:整行dropna()后检查数据是否足够
|
||||
df_clean = df[ohlcv_cols].dropna()
|
||||
if len(df_clean) < self.n_days + 1:
|
||||
print(f" ⚠ 剔除 {code}: OHLCV数据不足 ({len(df_clean)} < {self.n_days + 1})")
|
||||
continue
|
||||
close_series = df_clean['close']
|
||||
else:
|
||||
close_series = df.dropna()
|
||||
# 只有close列的情况
|
||||
if 'close' in df.columns:
|
||||
close_series = df['close'].dropna()
|
||||
else:
|
||||
close_series = df.dropna()
|
||||
|
||||
if len(close_series) < self.n_days + 1:
|
||||
print(f" ⚠ 剔除 {code}: close数据不足 ({len(close_series)} < {self.n_days + 1})")
|
||||
continue
|
||||
|
||||
# 关键:对齐到A股交易日历(匹配原引擎逻辑)
|
||||
# 境外市场的交易日与A股不同,需要前向填充到A股交易日
|
||||
close_aligned = close_series.reindex(a_share_dates, method='ffill')
|
||||
# 原引擎逻辑:先在原始交易日历上计算因子
|
||||
# rolling窗口使用的是原始交易日数据,不包含ffill填充的重复值
|
||||
close_df = pd.DataFrame({'close': close_series})
|
||||
factor_series = self._factor.compute(close_df)
|
||||
|
||||
# 原引擎剔除逻辑:对齐后的数据需要至少 n_days + 1 条有效值
|
||||
valid_count = close_aligned.notna().sum()
|
||||
if valid_count < self.n_days + 1:
|
||||
print(f" ⚠ 剔除 {code}: 数据不足 ({valid_count} < {self.n_days + 1})")
|
||||
continue
|
||||
# 然后对齐因子序列到A股交易日历(匹配原引擎逻辑)
|
||||
factor_aligned = factor_series.reindex(a_share_dates, method='ffill')
|
||||
|
||||
# 在对齐后的数据上计算因子
|
||||
close_df = pd.DataFrame({'close': close_aligned})
|
||||
values = self._factor.compute(close_df)
|
||||
factor_values[code] = values
|
||||
factor_values[code] = factor_aligned
|
||||
final_valid_codes.append(code)
|
||||
|
||||
factor_df = pd.DataFrame(factor_values)
|
||||
|
||||
Reference in New Issue
Block a user