factor minner
This commit is contained in:
237
factor_mining/validator.py
Normal file
237
factor_mining/validator.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""
|
||||
因子有效性检验模块:整合所有检验方案
|
||||
"""
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Optional
|
||||
from dataclasses import dataclass
|
||||
from statsmodels.regression.linear_model import OLS
|
||||
|
||||
from validation import (
|
||||
compute_ic,
|
||||
compute_rolling_ic,
|
||||
group_backtest,
|
||||
factor_span_regression
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationConfig:
|
||||
"""验证配置"""
|
||||
ic_window: int = 30
|
||||
ic_method: str = "spearman" # "spearman" or "pearson"
|
||||
n_groups: int = 3
|
||||
group_period: int = 180
|
||||
min_ic: float = 0.01
|
||||
min_tstat: float = 1.5
|
||||
min_r2_change: float = 0.05
|
||||
|
||||
|
||||
class FactorValidator:
|
||||
"""因子有效性检验器"""
|
||||
|
||||
def __init__(self, config: ValidationConfig):
|
||||
self.config = config
|
||||
|
||||
def validate_ic(
|
||||
self,
|
||||
factor: pd.Series,
|
||||
forward_return: pd.Series
|
||||
) -> Dict:
|
||||
"""
|
||||
IC检验
|
||||
|
||||
Returns:
|
||||
--------
|
||||
dict: 包含mean_ic, ic_ir, ic_series等
|
||||
"""
|
||||
rolling_ic = compute_rolling_ic(
|
||||
factor,
|
||||
forward_return,
|
||||
window=self.config.ic_window,
|
||||
method=self.config.ic_method
|
||||
)
|
||||
|
||||
mean_ic = rolling_ic.mean()
|
||||
ic_std = rolling_ic.std()
|
||||
ic_ir = mean_ic / (ic_std + 1e-8) # IC信息比率
|
||||
|
||||
return {
|
||||
"mean_ic": mean_ic,
|
||||
"ic_std": ic_std,
|
||||
"ic_ir": ic_ir,
|
||||
"ic_series": rolling_ic,
|
||||
"is_valid": abs(mean_ic) >= self.config.min_ic
|
||||
}
|
||||
|
||||
def validate_group_backtest(
|
||||
self,
|
||||
factor: pd.Series,
|
||||
forward_return: pd.Series
|
||||
) -> Dict:
|
||||
"""
|
||||
分组回测检验
|
||||
|
||||
Returns:
|
||||
--------
|
||||
dict: 包含mean_h_l_return, mean_h_l_tstat等
|
||||
"""
|
||||
result = group_backtest(
|
||||
factor,
|
||||
forward_return,
|
||||
n_groups=self.config.n_groups,
|
||||
group_period=self.config.group_period
|
||||
)
|
||||
|
||||
is_valid = abs(result.get('mean_h_l_tstat', 0)) >= self.config.min_tstat
|
||||
|
||||
return {
|
||||
**result,
|
||||
"is_valid": is_valid
|
||||
}
|
||||
|
||||
def validate_regression(
|
||||
self,
|
||||
factor: pd.Series,
|
||||
forward_return: pd.Series,
|
||||
other_factors: Optional[pd.DataFrame] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
因子跨度回归检验
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
factor : Series
|
||||
待检验因子
|
||||
forward_return : Series
|
||||
未来收益率
|
||||
other_factors : DataFrame, optional
|
||||
其他因子(用于控制变量)
|
||||
|
||||
Returns:
|
||||
--------
|
||||
dict: 包含beta, tstat, r2_change等
|
||||
"""
|
||||
if other_factors is None:
|
||||
other_factors = pd.DataFrame()
|
||||
|
||||
# 合并因子
|
||||
factors_df = pd.concat([other_factors, factor.to_frame(name='target')], axis=1)
|
||||
|
||||
result = factor_span_regression(
|
||||
factors_df,
|
||||
forward_return,
|
||||
target_factor='target'
|
||||
)
|
||||
|
||||
is_valid = (
|
||||
abs(result.get('tstat', 0)) >= self.config.min_tstat and
|
||||
result.get('r2_change', 0) >= self.config.min_r2_change
|
||||
)
|
||||
|
||||
return {
|
||||
**result,
|
||||
"is_valid": is_valid
|
||||
}
|
||||
|
||||
def validate_all(
|
||||
self,
|
||||
factor: pd.Series,
|
||||
forward_return: pd.Series,
|
||||
other_factors: Optional[pd.DataFrame] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
综合检验:执行所有检验方法
|
||||
|
||||
Returns:
|
||||
--------
|
||||
dict: 包含所有检验结果和综合判断
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# IC检验
|
||||
ic_result = self.validate_ic(factor, forward_return)
|
||||
results['ic'] = ic_result
|
||||
|
||||
# 分组回测
|
||||
group_result = self.validate_group_backtest(factor, forward_return)
|
||||
results['group_backtest'] = group_result
|
||||
|
||||
# 回归检验
|
||||
reg_result = self.validate_regression(factor, forward_return, other_factors)
|
||||
results['regression'] = reg_result
|
||||
|
||||
# 综合判断
|
||||
is_valid = (
|
||||
ic_result['is_valid'] and
|
||||
group_result['is_valid'] and
|
||||
reg_result['is_valid']
|
||||
)
|
||||
|
||||
results['is_valid'] = is_valid
|
||||
results['score'] = self._calculate_score(ic_result, group_result, reg_result)
|
||||
|
||||
return results
|
||||
|
||||
def _calculate_score(
|
||||
self,
|
||||
ic_result: Dict,
|
||||
group_result: Dict,
|
||||
reg_result: Dict
|
||||
) -> float:
|
||||
"""计算综合得分"""
|
||||
score = 0.0
|
||||
|
||||
# IC得分(权重0.3)
|
||||
ic_score = abs(ic_result.get('mean_ic', 0)) * 10
|
||||
score += ic_score * 0.3
|
||||
|
||||
# 分组回测得分(权重0.4)
|
||||
tstat = abs(group_result.get('mean_h_l_tstat', 0))
|
||||
tstat_score = min(tstat / 3.0, 1.0) # 归一化到[0, 1]
|
||||
score += tstat_score * 0.4
|
||||
|
||||
# 回归得分(权重0.3)
|
||||
r2_change = reg_result.get('r2_change', 0)
|
||||
r2_score = min(r2_change / 0.1, 1.0) # 归一化到[0, 1]
|
||||
score += r2_score * 0.3
|
||||
|
||||
return score
|
||||
|
||||
def filter_factors(
|
||||
self,
|
||||
factors: pd.DataFrame,
|
||||
forward_return: pd.Series
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
批量过滤因子:只保留有效因子
|
||||
|
||||
Returns:
|
||||
--------
|
||||
DataFrame: 有效因子
|
||||
"""
|
||||
valid_factors = []
|
||||
|
||||
for col in factors.columns:
|
||||
factor = factors[col]
|
||||
result = self.validate_all(factor, forward_return, factors.drop(columns=[col]))
|
||||
|
||||
if result['is_valid']:
|
||||
valid_factors.append(col)
|
||||
|
||||
return factors[valid_factors] if valid_factors else pd.DataFrame()
|
||||
|
||||
|
||||
def create_validator(
|
||||
ic_window: int = 30,
|
||||
min_ic: float = 0.01,
|
||||
min_tstat: float = 1.5
|
||||
) -> FactorValidator:
|
||||
"""创建验证器(便捷函数)"""
|
||||
config = ValidationConfig(
|
||||
ic_window=ic_window,
|
||||
min_ic=min_ic,
|
||||
min_tstat=min_tstat
|
||||
)
|
||||
return FactorValidator(config)
|
||||
|
||||
Reference in New Issue
Block a user