""" 因子组合模块:多因子模型 """ import numpy as np import pandas as pd from typing import Dict, List, Optional from statsmodels.regression.linear_model import OLS def risk_parity_weights(factors: pd.DataFrame, window: int = 30) -> pd.Series: """ 风险平价加权:使每个因子的风险贡献相等 Parameters: ----------- factors : DataFrame 因子数据框 window : int 计算波动率的滚动窗口 """ # 计算各因子的滚动波动率 volatilities = factors.rolling(window).std().iloc[-1] # 风险平价权重:w_i = 1/σ_i / Σ(1/σ_j) inv_vol = 1.0 / (volatilities + 1e-8) weights = inv_vol / inv_vol.sum() return weights def regression_weights( factors: pd.DataFrame, forward_return: pd.Series, window: Optional[int] = None ) -> pd.Series: """ 回归系数加权:用回归系数作为权重 Parameters: ----------- factors : DataFrame 因子数据框 forward_return : Series 未来收益率 window : int, optional 滚动窗口,None表示使用全样本 """ # 对齐数据 data = pd.concat([factors, forward_return], axis=1).dropna() if window is not None and len(data) > window: data = data.iloc[-window:] if len(data) < 30: # 如果数据不足,返回等权重 return pd.Series(1.0 / len(factors.columns), index=factors.columns) y = data.iloc[:, -1].values X = data.iloc[:, :-1].values try: model = OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 6}) betas = pd.Series(model.params, index=factors.columns) # 归一化权重(取绝对值后归一化) weights = np.abs(betas) / (np.abs(betas).sum() + 1e-8) return weights except Exception as e: print(f"回归权重计算出错: {e}") # 返回等权重 return pd.Series(1.0 / len(factors.columns), index=factors.columns) def equal_weights(factors: pd.DataFrame) -> pd.Series: """等权重""" n = len(factors.columns) return pd.Series(1.0 / n, index=factors.columns) class MultiFactorModel: """多因子模型""" def __init__(self, weight_method: str = 'risk_parity'): """ Parameters: ----------- weight_method : str 权重方法:'risk_parity', 'regression', 'equal' """ self.weight_method = weight_method self.weights: Optional[pd.Series] = None self.factor_names: List[str] = [] def fit( self, factors: pd.DataFrame, forward_return: Optional[pd.Series] = None, window: Optional[int] = None ): """ 拟合多因子模型 Parameters: ----------- factors : DataFrame 因子数据框 forward_return : Series, optional 未来收益率(回归权重需要) window : int, optional 滚动窗口 """ self.factor_names = list(factors.columns) if self.weight_method == 'risk_parity': self.weights = risk_parity_weights(factors, window=window or 30) elif self.weight_method == 'regression': if forward_return is None: raise ValueError("回归权重需要提供forward_return") self.weights = regression_weights(factors, forward_return, window=window) elif self.weight_method == 'equal': self.weights = equal_weights(factors) else: raise ValueError(f"未知的权重方法: {self.weight_method}") def predict(self, factors: pd.DataFrame) -> pd.Series: """ 计算多因子综合得分 Parameters: ----------- factors : DataFrame 因子数据框 Returns: -------- Series: 综合得分 """ if self.weights is None: raise ValueError("模型尚未拟合,请先调用fit()") # 确保因子顺序一致 factors_aligned = factors[self.factor_names] # 计算加权得分 score = (factors_aligned * self.weights).sum(axis=1) return score def get_weights(self) -> pd.Series: """获取因子权重""" return self.weights.copy() if self.weights is not None else pd.Series()