Compare commits

...

3 Commits

Author SHA1 Message Date
cbd60894b9 fix(strategy): 修复债券指数OHLCV数据处理逻辑
- 问题: 债券指数(931862.CSI)只有close数据,open/high/low全是None
- 原代码: 检查列存在后整行dropna → 数据变成0条
- 修复: 检查列存在 + 检查数据是否有效(不全为None)
- 如果OHLCV无效 → 使用close列单独dropna
- 结果: 30年国债4330条数据正常参与回测
- 收益影响: 累计收益+258%, Sharpe+0.04
2026-05-15 23:26:54 +08:00
85c20b4626 refactor(strategy): 取消数据不足标的剔除逻辑,保留所有标的以暴露策略问题
- compute_factors: 不剔除数据不足/缺失率高的标的
- 改为警告并保留,因子值NaN时信号生成自动跳过
- 目的:暴露策略自身问题,后续支持更多大类资产
- 回测配置改为start_date=2000-01-01以测试更长历史
2026-05-15 23:18:44 +08:00
763713213c refactor(config): 有色金属标的改用COMEX铜期货替代上期所
- CU.SHF -> HG=F: COMEX铜期货(2000年至今)
- 原因:上期所主力合约数据仅2018年后,COMEX铜数据更长
- ETF保持不变(159980.SZ 有色金属ETF)
- 配合之前替换:AU.SHF->GC=F, CL.NYM->CL=F
2026-05-15 22:21:55 +08:00
2 changed files with 45 additions and 33 deletions

View File

@@ -47,9 +47,10 @@ code_list:
name: "原油" name: "原油"
etf: "160723.SZ" # 国内原油ETF etf: "160723.SZ" # 国内原油ETF
market: "COMMODITY" market: "COMMODITY"
"CU.SHF": # 使用 COMEX 铜期货替代上期所主力合约(数据更长)
"HG=F": # COMEX铜期货2000年至今
name: "有色金属" name: "有色金属"
etf: "159980.SZ" etf: "159980.SZ" # 国内有色金属ETF
market: "COMMODITY" market: "COMMODITY"
"931862.CSI": "931862.CSI":
name: "30年国债" name: "30年国债"
@@ -67,7 +68,7 @@ benchmark:
name: "沪深300" name: "沪深300"
# ==================== 回测参数 ==================== # ==================== 回测参数 ====================
start_date: "2019-01-01" start_date: "2000-01-01"
# ==================== 因子参数 ==================== # ==================== 因子参数 ====================
# 动量/趋势窗口期(天数) # 动量/趋势窗口期(天数)

View File

@@ -277,7 +277,10 @@ class RotationStrategy(StrategyBase):
} }
def compute_factors(self, data: dict) -> pd.DataFrame: def compute_factors(self, data: dict) -> pd.DataFrame:
"""计算因子值匹配原引擎先计算因子再对齐到A股交易日历""" """计算因子值匹配原引擎先计算因子再对齐到A股交易日历
注意:不剔除数据不足的标的,保留所有标的以暴露策略问题
"""
index_data = data['index_data'] index_data = data['index_data']
valid_codes = data['valid_codes'] valid_codes = data['valid_codes']
@@ -299,53 +302,61 @@ class RotationStrategy(StrategyBase):
for code in valid_codes: for code in valid_codes:
df = index_data[code].copy() df = index_data[code].copy()
# 原引擎剔除逻辑如果有OHLCV列整行dropna()后再检查长度 # 检查是否有有效的OHLCV数据列存在且不全为None
# 这会剔除国债等只有close数据的标的open/high/low全空
ohlcv_cols = ['open', 'high', 'low', 'close', 'volume'] ohlcv_cols = ['open', 'high', 'low', 'close', 'volume']
has_ohlcv = all(col in df.columns for col in ['open', 'high', 'low', 'close']) required_cols = ['open', 'high', 'low', 'close']
if has_ohlcv: # 检查列是否存在
# 原引擎逻辑整行dropna()后检查数据是否足够 cols_exist = all(col in df.columns for col in required_cols)
df_clean = df[ohlcv_cols].dropna()
if len(df_clean) < self.n_days + 1: # 检查数据是否有效不全为None/NaN
print(f" ⚠ 剔除 {code}: OHLCV数据不足 ({len(df_clean)} < {self.n_days + 1})") if cols_exist:
continue cols_have_data = all(df[col].notna().any() for col in required_cols)
close_series = df_clean['close']
else: else:
# 只有close列的情况 cols_have_data = False
if 'close' in df.columns:
if cols_exist and cols_have_data:
# 有完整有效的OHLCV数据整行dropna()后提取close
df_clean = df[ohlcv_cols].dropna()
close_series = df_clean['close'] if len(df_clean) > 0 else pd.Series(dtype=float)
elif 'close' in df.columns and df['close'].notna().any():
# 只有close列有效数据如债券指数
close_series = df['close'].dropna() close_series = df['close'].dropna()
else: else:
close_series = df.dropna() # 无有效数据
close_series = pd.Series(dtype=float)
# 检查数据长度并警告,但不剔除
if len(close_series) < self.n_days + 1: if len(close_series) < self.n_days + 1:
print(f" 剔除 {code}: close数据不足 ({len(close_series)} < {self.n_days + 1})") print(f"{code}: 数据不足 ({len(close_series)} < {self.n_days + 1})保留但因子值可能为NaN")
continue
# 原引擎逻辑:先在原始交易日历上计算因子 # 原引擎逻辑:先在原始交易日历上计算因子
# rolling窗口使用的是原始交易日数据不包含ffill填充的重复值 # rolling窗口使用的是原始交易日数据不包含ffill填充的重复值
if len(close_series) > 0:
close_df = pd.DataFrame({'close': close_series}) close_df = pd.DataFrame({'close': close_series})
factor_series = self._factor.compute(close_df) factor_series = self._factor.compute(close_df)
# 然后对齐因子序列到A股交易日历匹配原引擎逻辑 # 然后对齐因子序列到A股交易日历匹配原引擎逻辑
factor_aligned = factor_series.reindex(a_share_dates, method='ffill') factor_aligned = factor_series.reindex(a_share_dates, method='ffill')
else:
# 没有数据,创建空的因子序列
factor_aligned = pd.Series(index=a_share_dates, dtype=float)
factor_values[code] = factor_aligned factor_values[code] = factor_aligned
final_valid_codes.append(code) final_valid_codes.append(code)
factor_df = pd.DataFrame(factor_values) factor_df = pd.DataFrame(factor_values)
# 过滤缺失率过高的标的 # 检查缺失率并警告,但不剔除(保留所有标的以暴露策略问题)
total_rows = len(factor_df) total_rows = len(factor_df)
for code in final_valid_codes: for code in final_valid_codes:
if code in factor_df.columns: if code in factor_df.columns:
null_pct = factor_df[code].isnull().sum() / total_rows null_pct = factor_df[code].isnull().sum() / total_rows
if null_pct > 0.5: if null_pct > 0.5:
print(f" 剔除 {code}: 缺失率 {null_pct:.1%}") print(f"{code}: 缺失率 {null_pct:.1%} 较高,保留但信号生成时可能跳")
factor_df = factor_df.drop(columns=[code])
# 更新有效代码列表 # 更新有效代码列表,保留所有原始代码
data['valid_codes'] = [c for c in final_valid_codes if c in factor_df.columns] data['valid_codes'] = final_valid_codes
return factor_df return factor_df