Compare commits
3 Commits
4f1207dc4d
...
cbd60894b9
| Author | SHA1 | Date | |
|---|---|---|---|
| cbd60894b9 | |||
| 85c20b4626 | |||
| 763713213c |
@@ -47,9 +47,10 @@ code_list:
|
|||||||
name: "原油"
|
name: "原油"
|
||||||
etf: "160723.SZ" # 国内原油ETF
|
etf: "160723.SZ" # 国内原油ETF
|
||||||
market: "COMMODITY"
|
market: "COMMODITY"
|
||||||
"CU.SHF":
|
# 使用 COMEX 铜期货替代上期所主力合约(数据更长)
|
||||||
|
"HG=F": # COMEX铜期货(2000年至今)
|
||||||
name: "有色金属"
|
name: "有色金属"
|
||||||
etf: "159980.SZ"
|
etf: "159980.SZ" # 国内有色金属ETF
|
||||||
market: "COMMODITY"
|
market: "COMMODITY"
|
||||||
"931862.CSI":
|
"931862.CSI":
|
||||||
name: "30年国债"
|
name: "30年国债"
|
||||||
@@ -67,7 +68,7 @@ benchmark:
|
|||||||
name: "沪深300"
|
name: "沪深300"
|
||||||
|
|
||||||
# ==================== 回测参数 ====================
|
# ==================== 回测参数 ====================
|
||||||
start_date: "2019-01-01"
|
start_date: "2000-01-01"
|
||||||
|
|
||||||
# ==================== 因子参数 ====================
|
# ==================== 因子参数 ====================
|
||||||
# 动量/趋势窗口期(天数)
|
# 动量/趋势窗口期(天数)
|
||||||
|
|||||||
@@ -277,7 +277,10 @@ class RotationStrategy(StrategyBase):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def compute_factors(self, data: dict) -> pd.DataFrame:
|
def compute_factors(self, data: dict) -> pd.DataFrame:
|
||||||
"""计算因子值(匹配原引擎:先计算因子再对齐到A股交易日历)"""
|
"""计算因子值(匹配原引擎:先计算因子再对齐到A股交易日历)
|
||||||
|
|
||||||
|
注意:不剔除数据不足的标的,保留所有标的以暴露策略问题
|
||||||
|
"""
|
||||||
index_data = data['index_data']
|
index_data = data['index_data']
|
||||||
valid_codes = data['valid_codes']
|
valid_codes = data['valid_codes']
|
||||||
|
|
||||||
@@ -299,53 +302,61 @@ class RotationStrategy(StrategyBase):
|
|||||||
for code in valid_codes:
|
for code in valid_codes:
|
||||||
df = index_data[code].copy()
|
df = index_data[code].copy()
|
||||||
|
|
||||||
# 原引擎剔除逻辑:如果有OHLCV列,整行dropna()后再检查长度
|
# 检查是否有有效的OHLCV数据(列存在且不全为None)
|
||||||
# 这会剔除国债等只有close数据的标的(open/high/low全空)
|
|
||||||
ohlcv_cols = ['open', 'high', 'low', 'close', 'volume']
|
ohlcv_cols = ['open', 'high', 'low', 'close', 'volume']
|
||||||
has_ohlcv = all(col in df.columns for col in ['open', 'high', 'low', 'close'])
|
required_cols = ['open', 'high', 'low', 'close']
|
||||||
|
|
||||||
if has_ohlcv:
|
# 检查列是否存在
|
||||||
# 原引擎逻辑:整行dropna()后检查数据是否足够
|
cols_exist = all(col in df.columns for col in required_cols)
|
||||||
df_clean = df[ohlcv_cols].dropna()
|
|
||||||
if len(df_clean) < self.n_days + 1:
|
# 检查数据是否有效(不全为None/NaN)
|
||||||
print(f" ⚠ 剔除 {code}: OHLCV数据不足 ({len(df_clean)} < {self.n_days + 1})")
|
if cols_exist:
|
||||||
continue
|
cols_have_data = all(df[col].notna().any() for col in required_cols)
|
||||||
close_series = df_clean['close']
|
|
||||||
else:
|
else:
|
||||||
# 只有close列的情况
|
cols_have_data = False
|
||||||
if 'close' in df.columns:
|
|
||||||
|
if cols_exist and cols_have_data:
|
||||||
|
# 有完整有效的OHLCV数据,整行dropna()后提取close
|
||||||
|
df_clean = df[ohlcv_cols].dropna()
|
||||||
|
close_series = df_clean['close'] if len(df_clean) > 0 else pd.Series(dtype=float)
|
||||||
|
elif 'close' in df.columns and df['close'].notna().any():
|
||||||
|
# 只有close列有效数据(如债券指数)
|
||||||
close_series = df['close'].dropna()
|
close_series = df['close'].dropna()
|
||||||
else:
|
else:
|
||||||
close_series = df.dropna()
|
# 无有效数据
|
||||||
|
close_series = pd.Series(dtype=float)
|
||||||
|
|
||||||
|
# 检查数据长度并警告,但不剔除
|
||||||
if len(close_series) < self.n_days + 1:
|
if len(close_series) < self.n_days + 1:
|
||||||
print(f" ⚠ 剔除 {code}: close数据不足 ({len(close_series)} < {self.n_days + 1})")
|
print(f" ⚠ {code}: 数据不足 ({len(close_series)} < {self.n_days + 1}),保留但因子值可能为NaN")
|
||||||
continue
|
|
||||||
|
|
||||||
# 原引擎逻辑:先在原始交易日历上计算因子
|
# 原引擎逻辑:先在原始交易日历上计算因子
|
||||||
# rolling窗口使用的是原始交易日数据,不包含ffill填充的重复值
|
# rolling窗口使用的是原始交易日数据,不包含ffill填充的重复值
|
||||||
|
if len(close_series) > 0:
|
||||||
close_df = pd.DataFrame({'close': close_series})
|
close_df = pd.DataFrame({'close': close_series})
|
||||||
factor_series = self._factor.compute(close_df)
|
factor_series = self._factor.compute(close_df)
|
||||||
|
|
||||||
# 然后对齐因子序列到A股交易日历(匹配原引擎逻辑)
|
# 然后对齐因子序列到A股交易日历(匹配原引擎逻辑)
|
||||||
factor_aligned = factor_series.reindex(a_share_dates, method='ffill')
|
factor_aligned = factor_series.reindex(a_share_dates, method='ffill')
|
||||||
|
else:
|
||||||
|
# 没有数据,创建空的因子序列
|
||||||
|
factor_aligned = pd.Series(index=a_share_dates, dtype=float)
|
||||||
|
|
||||||
factor_values[code] = factor_aligned
|
factor_values[code] = factor_aligned
|
||||||
final_valid_codes.append(code)
|
final_valid_codes.append(code)
|
||||||
|
|
||||||
factor_df = pd.DataFrame(factor_values)
|
factor_df = pd.DataFrame(factor_values)
|
||||||
|
|
||||||
# 过滤缺失率过高的标的
|
# 检查缺失率并警告,但不剔除(保留所有标的以暴露策略问题)
|
||||||
total_rows = len(factor_df)
|
total_rows = len(factor_df)
|
||||||
for code in final_valid_codes:
|
for code in final_valid_codes:
|
||||||
if code in factor_df.columns:
|
if code in factor_df.columns:
|
||||||
null_pct = factor_df[code].isnull().sum() / total_rows
|
null_pct = factor_df[code].isnull().sum() / total_rows
|
||||||
if null_pct > 0.5:
|
if null_pct > 0.5:
|
||||||
print(f" ⚠ 剔除 {code}: 缺失率 {null_pct:.1%} 过高")
|
print(f" ⚠ {code}: 缺失率 {null_pct:.1%} 较高,保留但信号生成时可能跳过")
|
||||||
factor_df = factor_df.drop(columns=[code])
|
|
||||||
|
|
||||||
# 更新有效代码列表
|
# 不更新有效代码列表,保留所有原始代码
|
||||||
data['valid_codes'] = [c for c in final_valid_codes if c in factor_df.columns]
|
data['valid_codes'] = final_valid_codes
|
||||||
|
|
||||||
return factor_df
|
return factor_df
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user