238 lines
6.1 KiB
Python
238 lines
6.1 KiB
Python
"""
|
||
因子有效性检验模块:整合所有检验方案
|
||
"""
|
||
import numpy as np
|
||
import pandas as pd
|
||
from typing import Dict, List, Optional
|
||
from dataclasses import dataclass
|
||
from statsmodels.regression.linear_model import OLS
|
||
|
||
from validation import (
|
||
compute_ic,
|
||
compute_rolling_ic,
|
||
group_backtest,
|
||
factor_span_regression
|
||
)
|
||
|
||
|
||
@dataclass
|
||
class ValidationConfig:
|
||
"""验证配置"""
|
||
ic_window: int = 30
|
||
ic_method: str = "spearman" # "spearman" or "pearson"
|
||
n_groups: int = 3
|
||
group_period: int = 180
|
||
min_ic: float = 0.01
|
||
min_tstat: float = 1.5
|
||
min_r2_change: float = 0.05
|
||
|
||
|
||
class FactorValidator:
|
||
"""因子有效性检验器"""
|
||
|
||
def __init__(self, config: ValidationConfig):
|
||
self.config = config
|
||
|
||
def validate_ic(
|
||
self,
|
||
factor: pd.Series,
|
||
forward_return: pd.Series
|
||
) -> Dict:
|
||
"""
|
||
IC检验
|
||
|
||
Returns:
|
||
--------
|
||
dict: 包含mean_ic, ic_ir, ic_series等
|
||
"""
|
||
rolling_ic = compute_rolling_ic(
|
||
factor,
|
||
forward_return,
|
||
window=self.config.ic_window,
|
||
method=self.config.ic_method
|
||
)
|
||
|
||
mean_ic = rolling_ic.mean()
|
||
ic_std = rolling_ic.std()
|
||
ic_ir = mean_ic / (ic_std + 1e-8) # IC信息比率
|
||
|
||
return {
|
||
"mean_ic": mean_ic,
|
||
"ic_std": ic_std,
|
||
"ic_ir": ic_ir,
|
||
"ic_series": rolling_ic,
|
||
"is_valid": abs(mean_ic) >= self.config.min_ic
|
||
}
|
||
|
||
def validate_group_backtest(
|
||
self,
|
||
factor: pd.Series,
|
||
forward_return: pd.Series
|
||
) -> Dict:
|
||
"""
|
||
分组回测检验
|
||
|
||
Returns:
|
||
--------
|
||
dict: 包含mean_h_l_return, mean_h_l_tstat等
|
||
"""
|
||
result = group_backtest(
|
||
factor,
|
||
forward_return,
|
||
n_groups=self.config.n_groups,
|
||
group_period=self.config.group_period
|
||
)
|
||
|
||
is_valid = abs(result.get('mean_h_l_tstat', 0)) >= self.config.min_tstat
|
||
|
||
return {
|
||
**result,
|
||
"is_valid": is_valid
|
||
}
|
||
|
||
def validate_regression(
|
||
self,
|
||
factor: pd.Series,
|
||
forward_return: pd.Series,
|
||
other_factors: Optional[pd.DataFrame] = None
|
||
) -> Dict:
|
||
"""
|
||
因子跨度回归检验
|
||
|
||
Parameters:
|
||
-----------
|
||
factor : Series
|
||
待检验因子
|
||
forward_return : Series
|
||
未来收益率
|
||
other_factors : DataFrame, optional
|
||
其他因子(用于控制变量)
|
||
|
||
Returns:
|
||
--------
|
||
dict: 包含beta, tstat, r2_change等
|
||
"""
|
||
if other_factors is None:
|
||
other_factors = pd.DataFrame()
|
||
|
||
# 合并因子
|
||
factors_df = pd.concat([other_factors, factor.to_frame(name='target')], axis=1)
|
||
|
||
result = factor_span_regression(
|
||
factors_df,
|
||
forward_return,
|
||
target_factor='target'
|
||
)
|
||
|
||
is_valid = (
|
||
abs(result.get('tstat', 0)) >= self.config.min_tstat and
|
||
result.get('r2_change', 0) >= self.config.min_r2_change
|
||
)
|
||
|
||
return {
|
||
**result,
|
||
"is_valid": is_valid
|
||
}
|
||
|
||
def validate_all(
|
||
self,
|
||
factor: pd.Series,
|
||
forward_return: pd.Series,
|
||
other_factors: Optional[pd.DataFrame] = None
|
||
) -> Dict:
|
||
"""
|
||
综合检验:执行所有检验方法
|
||
|
||
Returns:
|
||
--------
|
||
dict: 包含所有检验结果和综合判断
|
||
"""
|
||
results = {}
|
||
|
||
# IC检验
|
||
ic_result = self.validate_ic(factor, forward_return)
|
||
results['ic'] = ic_result
|
||
|
||
# 分组回测
|
||
group_result = self.validate_group_backtest(factor, forward_return)
|
||
results['group_backtest'] = group_result
|
||
|
||
# 回归检验
|
||
reg_result = self.validate_regression(factor, forward_return, other_factors)
|
||
results['regression'] = reg_result
|
||
|
||
# 综合判断
|
||
is_valid = (
|
||
ic_result['is_valid'] and
|
||
group_result['is_valid'] and
|
||
reg_result['is_valid']
|
||
)
|
||
|
||
results['is_valid'] = is_valid
|
||
results['score'] = self._calculate_score(ic_result, group_result, reg_result)
|
||
|
||
return results
|
||
|
||
def _calculate_score(
|
||
self,
|
||
ic_result: Dict,
|
||
group_result: Dict,
|
||
reg_result: Dict
|
||
) -> float:
|
||
"""计算综合得分"""
|
||
score = 0.0
|
||
|
||
# IC得分(权重0.3)
|
||
ic_score = abs(ic_result.get('mean_ic', 0)) * 10
|
||
score += ic_score * 0.3
|
||
|
||
# 分组回测得分(权重0.4)
|
||
tstat = abs(group_result.get('mean_h_l_tstat', 0))
|
||
tstat_score = min(tstat / 3.0, 1.0) # 归一化到[0, 1]
|
||
score += tstat_score * 0.4
|
||
|
||
# 回归得分(权重0.3)
|
||
r2_change = reg_result.get('r2_change', 0)
|
||
r2_score = min(r2_change / 0.1, 1.0) # 归一化到[0, 1]
|
||
score += r2_score * 0.3
|
||
|
||
return score
|
||
|
||
def filter_factors(
|
||
self,
|
||
factors: pd.DataFrame,
|
||
forward_return: pd.Series
|
||
) -> pd.DataFrame:
|
||
"""
|
||
批量过滤因子:只保留有效因子
|
||
|
||
Returns:
|
||
--------
|
||
DataFrame: 有效因子
|
||
"""
|
||
valid_factors = []
|
||
|
||
for col in factors.columns:
|
||
factor = factors[col]
|
||
result = self.validate_all(factor, forward_return, factors.drop(columns=[col]))
|
||
|
||
if result['is_valid']:
|
||
valid_factors.append(col)
|
||
|
||
return factors[valid_factors] if valid_factors else pd.DataFrame()
|
||
|
||
|
||
def create_validator(
|
||
ic_window: int = 30,
|
||
min_ic: float = 0.01,
|
||
min_tstat: float = 1.5
|
||
) -> FactorValidator:
|
||
"""创建验证器(便捷函数)"""
|
||
config = ValidationConfig(
|
||
ic_window=ic_window,
|
||
min_ic=min_ic,
|
||
min_tstat=min_tstat
|
||
)
|
||
return FactorValidator(config)
|
||
|