""" 因子有效性检验模块:整合所有检验方案 """ import numpy as np import pandas as pd from typing import Dict, List, Optional from dataclasses import dataclass from statsmodels.regression.linear_model import OLS from validation import ( compute_ic, compute_rolling_ic, group_backtest, factor_span_regression ) @dataclass class ValidationConfig: """验证配置""" ic_window: int = 30 ic_method: str = "spearman" # "spearman" or "pearson" n_groups: int = 3 group_period: int = 180 min_ic: float = 0.01 min_tstat: float = 1.5 min_r2_change: float = 0.05 class FactorValidator: """因子有效性检验器""" def __init__(self, config: ValidationConfig): self.config = config def validate_ic( self, factor: pd.Series, forward_return: pd.Series ) -> Dict: """ IC检验 Returns: -------- dict: 包含mean_ic, ic_ir, ic_series等 """ rolling_ic = compute_rolling_ic( factor, forward_return, window=self.config.ic_window, method=self.config.ic_method ) mean_ic = rolling_ic.mean() ic_std = rolling_ic.std() ic_ir = mean_ic / (ic_std + 1e-8) # IC信息比率 return { "mean_ic": mean_ic, "ic_std": ic_std, "ic_ir": ic_ir, "ic_series": rolling_ic, "is_valid": abs(mean_ic) >= self.config.min_ic } def validate_group_backtest( self, factor: pd.Series, forward_return: pd.Series ) -> Dict: """ 分组回测检验 Returns: -------- dict: 包含mean_h_l_return, mean_h_l_tstat等 """ result = group_backtest( factor, forward_return, n_groups=self.config.n_groups, group_period=self.config.group_period ) is_valid = abs(result.get('mean_h_l_tstat', 0)) >= self.config.min_tstat return { **result, "is_valid": is_valid } def validate_regression( self, factor: pd.Series, forward_return: pd.Series, other_factors: Optional[pd.DataFrame] = None ) -> Dict: """ 因子跨度回归检验 Parameters: ----------- factor : Series 待检验因子 forward_return : Series 未来收益率 other_factors : DataFrame, optional 其他因子(用于控制变量) Returns: -------- dict: 包含beta, tstat, r2_change等 """ if other_factors is None: other_factors = pd.DataFrame() # 合并因子 factors_df = pd.concat([other_factors, factor.to_frame(name='target')], axis=1) result = factor_span_regression( factors_df, forward_return, target_factor='target' ) is_valid = ( abs(result.get('tstat', 0)) >= self.config.min_tstat and result.get('r2_change', 0) >= self.config.min_r2_change ) return { **result, "is_valid": is_valid } def validate_all( self, factor: pd.Series, forward_return: pd.Series, other_factors: Optional[pd.DataFrame] = None ) -> Dict: """ 综合检验:执行所有检验方法 Returns: -------- dict: 包含所有检验结果和综合判断 """ results = {} # IC检验 ic_result = self.validate_ic(factor, forward_return) results['ic'] = ic_result # 分组回测 group_result = self.validate_group_backtest(factor, forward_return) results['group_backtest'] = group_result # 回归检验 reg_result = self.validate_regression(factor, forward_return, other_factors) results['regression'] = reg_result # 综合判断 is_valid = ( ic_result['is_valid'] and group_result['is_valid'] and reg_result['is_valid'] ) results['is_valid'] = is_valid results['score'] = self._calculate_score(ic_result, group_result, reg_result) return results def _calculate_score( self, ic_result: Dict, group_result: Dict, reg_result: Dict ) -> float: """计算综合得分""" score = 0.0 # IC得分(权重0.3) ic_score = abs(ic_result.get('mean_ic', 0)) * 10 score += ic_score * 0.3 # 分组回测得分(权重0.4) tstat = abs(group_result.get('mean_h_l_tstat', 0)) tstat_score = min(tstat / 3.0, 1.0) # 归一化到[0, 1] score += tstat_score * 0.4 # 回归得分(权重0.3) r2_change = reg_result.get('r2_change', 0) r2_score = min(r2_change / 0.1, 1.0) # 归一化到[0, 1] score += r2_score * 0.3 return score def filter_factors( self, factors: pd.DataFrame, forward_return: pd.Series ) -> pd.DataFrame: """ 批量过滤因子:只保留有效因子 Returns: -------- DataFrame: 有效因子 """ valid_factors = [] for col in factors.columns: factor = factors[col] result = self.validate_all(factor, forward_return, factors.drop(columns=[col])) if result['is_valid']: valid_factors.append(col) return factors[valid_factors] if valid_factors else pd.DataFrame() def create_validator( ic_window: int = 30, min_ic: float = 0.01, min_tstat: float = 1.5 ) -> FactorValidator: """创建验证器(便捷函数)""" config = ValidationConfig( ic_window=ic_window, min_ic=min_ic, min_tstat=min_tstat ) return FactorValidator(config)