factor minner

This commit is contained in:
2025-11-09 14:00:58 +08:00
parent a66e42a8ae
commit dc3d41d6e5
5 changed files with 1072 additions and 0 deletions

237
factor_mining/validator.py Normal file
View File

@@ -0,0 +1,237 @@
"""
因子有效性检验模块:整合所有检验方案
"""
import numpy as np
import pandas as pd
from typing import Dict, List, Optional
from dataclasses import dataclass
from statsmodels.regression.linear_model import OLS
from validation import (
compute_ic,
compute_rolling_ic,
group_backtest,
factor_span_regression
)
@dataclass
class ValidationConfig:
"""验证配置"""
ic_window: int = 30
ic_method: str = "spearman" # "spearman" or "pearson"
n_groups: int = 3
group_period: int = 180
min_ic: float = 0.01
min_tstat: float = 1.5
min_r2_change: float = 0.05
class FactorValidator:
"""因子有效性检验器"""
def __init__(self, config: ValidationConfig):
self.config = config
def validate_ic(
self,
factor: pd.Series,
forward_return: pd.Series
) -> Dict:
"""
IC检验
Returns:
--------
dict: 包含mean_ic, ic_ir, ic_series等
"""
rolling_ic = compute_rolling_ic(
factor,
forward_return,
window=self.config.ic_window,
method=self.config.ic_method
)
mean_ic = rolling_ic.mean()
ic_std = rolling_ic.std()
ic_ir = mean_ic / (ic_std + 1e-8) # IC信息比率
return {
"mean_ic": mean_ic,
"ic_std": ic_std,
"ic_ir": ic_ir,
"ic_series": rolling_ic,
"is_valid": abs(mean_ic) >= self.config.min_ic
}
def validate_group_backtest(
self,
factor: pd.Series,
forward_return: pd.Series
) -> Dict:
"""
分组回测检验
Returns:
--------
dict: 包含mean_h_l_return, mean_h_l_tstat等
"""
result = group_backtest(
factor,
forward_return,
n_groups=self.config.n_groups,
group_period=self.config.group_period
)
is_valid = abs(result.get('mean_h_l_tstat', 0)) >= self.config.min_tstat
return {
**result,
"is_valid": is_valid
}
def validate_regression(
self,
factor: pd.Series,
forward_return: pd.Series,
other_factors: Optional[pd.DataFrame] = None
) -> Dict:
"""
因子跨度回归检验
Parameters:
-----------
factor : Series
待检验因子
forward_return : Series
未来收益率
other_factors : DataFrame, optional
其他因子(用于控制变量)
Returns:
--------
dict: 包含beta, tstat, r2_change等
"""
if other_factors is None:
other_factors = pd.DataFrame()
# 合并因子
factors_df = pd.concat([other_factors, factor.to_frame(name='target')], axis=1)
result = factor_span_regression(
factors_df,
forward_return,
target_factor='target'
)
is_valid = (
abs(result.get('tstat', 0)) >= self.config.min_tstat and
result.get('r2_change', 0) >= self.config.min_r2_change
)
return {
**result,
"is_valid": is_valid
}
def validate_all(
self,
factor: pd.Series,
forward_return: pd.Series,
other_factors: Optional[pd.DataFrame] = None
) -> Dict:
"""
综合检验:执行所有检验方法
Returns:
--------
dict: 包含所有检验结果和综合判断
"""
results = {}
# IC检验
ic_result = self.validate_ic(factor, forward_return)
results['ic'] = ic_result
# 分组回测
group_result = self.validate_group_backtest(factor, forward_return)
results['group_backtest'] = group_result
# 回归检验
reg_result = self.validate_regression(factor, forward_return, other_factors)
results['regression'] = reg_result
# 综合判断
is_valid = (
ic_result['is_valid'] and
group_result['is_valid'] and
reg_result['is_valid']
)
results['is_valid'] = is_valid
results['score'] = self._calculate_score(ic_result, group_result, reg_result)
return results
def _calculate_score(
self,
ic_result: Dict,
group_result: Dict,
reg_result: Dict
) -> float:
"""计算综合得分"""
score = 0.0
# IC得分权重0.3
ic_score = abs(ic_result.get('mean_ic', 0)) * 10
score += ic_score * 0.3
# 分组回测得分权重0.4
tstat = abs(group_result.get('mean_h_l_tstat', 0))
tstat_score = min(tstat / 3.0, 1.0) # 归一化到[0, 1]
score += tstat_score * 0.4
# 回归得分权重0.3
r2_change = reg_result.get('r2_change', 0)
r2_score = min(r2_change / 0.1, 1.0) # 归一化到[0, 1]
score += r2_score * 0.3
return score
def filter_factors(
self,
factors: pd.DataFrame,
forward_return: pd.Series
) -> pd.DataFrame:
"""
批量过滤因子:只保留有效因子
Returns:
--------
DataFrame: 有效因子
"""
valid_factors = []
for col in factors.columns:
factor = factors[col]
result = self.validate_all(factor, forward_return, factors.drop(columns=[col]))
if result['is_valid']:
valid_factors.append(col)
return factors[valid_factors] if valid_factors else pd.DataFrame()
def create_validator(
ic_window: int = 30,
min_ic: float = 0.01,
min_tstat: float = 1.5
) -> FactorValidator:
"""创建验证器(便捷函数)"""
config = ValidationConfig(
ic_window=ic_window,
min_ic=min_ic,
min_tstat=min_tstat
)
return FactorValidator(config)