From 85c20b462676a1a9ddde26b10fb182c131135aa2 Mon Sep 17 00:00:00 2001 From: aszerW Date: Fri, 15 May 2026 23:18:44 +0800 Subject: [PATCH] =?UTF-8?q?refactor(strategy):=20=E5=8F=96=E6=B6=88?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E4=B8=8D=E8=B6=B3=E6=A0=87=E7=9A=84=E5=89=94?= =?UTF-8?q?=E9=99=A4=E9=80=BB=E8=BE=91=EF=BC=8C=E4=BF=9D=E7=95=99=E6=89=80?= =?UTF-8?q?=E6=9C=89=E6=A0=87=E7=9A=84=E4=BB=A5=E6=9A=B4=E9=9C=B2=E7=AD=96?= =?UTF-8?q?=E7=95=A5=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - compute_factors: 不剔除数据不足/缺失率高的标的 - 改为警告并保留,因子值NaN时信号生成自动跳过 - 目的:暴露策略自身问题,后续支持更多大类资产 - 回测配置改为start_date=2000-01-01以测试更长历史 --- strategies/rotation/config.yaml | 2 +- strategies/rotation/strategy.py | 46 +++++++++++++++++---------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/strategies/rotation/config.yaml b/strategies/rotation/config.yaml index f491a12..a7af3b6 100644 --- a/strategies/rotation/config.yaml +++ b/strategies/rotation/config.yaml @@ -68,7 +68,7 @@ benchmark: name: "沪深300" # ==================== 回测参数 ==================== -start_date: "2019-01-01" +start_date: "2000-01-01" # ==================== 因子参数 ==================== # 动量/趋势窗口期(天数) diff --git a/strategies/rotation/strategy.py b/strategies/rotation/strategy.py index ac5f97b..bf6ce72 100644 --- a/strategies/rotation/strategy.py +++ b/strategies/rotation/strategy.py @@ -277,7 +277,10 @@ class RotationStrategy(StrategyBase): } def compute_factors(self, data: dict) -> pd.DataFrame: - """计算因子值(匹配原引擎:先计算因子再对齐到A股交易日历)""" + """计算因子值(匹配原引擎:先计算因子再对齐到A股交易日历) + + 注意:不剔除数据不足的标的,保留所有标的以暴露策略问题 + """ index_data = data['index_data'] valid_codes = data['valid_codes'] @@ -299,53 +302,52 @@ class RotationStrategy(StrategyBase): for code in valid_codes: df = index_data[code].copy() - # 原引擎剔除逻辑:如果有OHLCV列,整行dropna()后再检查长度 - # 这会剔除国债等只有close数据的标的(open/high/low全空) + # 检查是否有OHLCV数据 ohlcv_cols = ['open', 'high', 'low', 'close', 'volume'] has_ohlcv = all(col in df.columns for col in ['open', 'high', 'low', 'close']) if has_ohlcv: - # 原引擎逻辑:整行dropna()后检查数据是否足够 + # 如果有完整OHLCV,整行dropna()后提取close df_clean = df[ohlcv_cols].dropna() - if len(df_clean) < self.n_days + 1: - print(f" ⚠ 剔除 {code}: OHLCV数据不足 ({len(df_clean)} < {self.n_days + 1})") - continue - close_series = df_clean['close'] + close_series = df_clean['close'] if len(df_clean) > 0 else pd.Series(dtype=float) else: # 只有close列的情况 if 'close' in df.columns: close_series = df['close'].dropna() else: close_series = df.dropna() - - if len(close_series) < self.n_days + 1: - print(f" ⚠ 剔除 {code}: close数据不足 ({len(close_series)} < {self.n_days + 1})") - continue + + # 检查数据长度并警告,但不剔除 + if len(close_series) < self.n_days + 1: + print(f" ⚠ {code}: 数据不足 ({len(close_series)} < {self.n_days + 1}),保留但因子值可能为NaN") # 原引擎逻辑:先在原始交易日历上计算因子 # rolling窗口使用的是原始交易日数据,不包含ffill填充的重复值 - close_df = pd.DataFrame({'close': close_series}) - factor_series = self._factor.compute(close_df) - - # 然后对齐因子序列到A股交易日历(匹配原引擎逻辑) - factor_aligned = factor_series.reindex(a_share_dates, method='ffill') + if len(close_series) > 0: + close_df = pd.DataFrame({'close': close_series}) + factor_series = self._factor.compute(close_df) + + # 然后对齐因子序列到A股交易日历(匹配原引擎逻辑) + factor_aligned = factor_series.reindex(a_share_dates, method='ffill') + else: + # 没有数据,创建空的因子序列 + factor_aligned = pd.Series(index=a_share_dates, dtype=float) factor_values[code] = factor_aligned final_valid_codes.append(code) factor_df = pd.DataFrame(factor_values) - # 过滤缺失率过高的标的 + # 检查缺失率并警告,但不剔除(保留所有标的以暴露策略问题) total_rows = len(factor_df) for code in final_valid_codes: if code in factor_df.columns: null_pct = factor_df[code].isnull().sum() / total_rows if null_pct > 0.5: - print(f" ⚠ 剔除 {code}: 缺失率 {null_pct:.1%} 过高") - factor_df = factor_df.drop(columns=[code]) + print(f" ⚠ {code}: 缺失率 {null_pct:.1%} 较高,保留但信号生成时可能跳过") - # 更新有效代码列表 - data['valid_codes'] = [c for c in final_valid_codes if c in factor_df.columns] + # 不更新有效代码列表,保留所有原始代码 + data['valid_codes'] = final_valid_codes return factor_df