153 lines
4.4 KiB
Python
153 lines
4.4 KiB
Python
"""
|
||
因子组合模块:多因子模型
|
||
"""
|
||
import numpy as np
|
||
import pandas as pd
|
||
from typing import Dict, List, Optional
|
||
from statsmodels.regression.linear_model import OLS
|
||
|
||
|
||
def risk_parity_weights(factors: pd.DataFrame, window: int = 30) -> pd.Series:
|
||
"""
|
||
风险平价加权:使每个因子的风险贡献相等
|
||
|
||
Parameters:
|
||
-----------
|
||
factors : DataFrame
|
||
因子数据框
|
||
window : int
|
||
计算波动率的滚动窗口
|
||
"""
|
||
# 计算各因子的滚动波动率
|
||
volatilities = factors.rolling(window).std().iloc[-1]
|
||
|
||
# 风险平价权重:w_i = 1/σ_i / Σ(1/σ_j)
|
||
inv_vol = 1.0 / (volatilities + 1e-8)
|
||
weights = inv_vol / inv_vol.sum()
|
||
|
||
return weights
|
||
|
||
|
||
def regression_weights(
|
||
factors: pd.DataFrame,
|
||
forward_return: pd.Series,
|
||
window: Optional[int] = None
|
||
) -> pd.Series:
|
||
"""
|
||
回归系数加权:用回归系数作为权重
|
||
|
||
Parameters:
|
||
-----------
|
||
factors : DataFrame
|
||
因子数据框
|
||
forward_return : Series
|
||
未来收益率
|
||
window : int, optional
|
||
滚动窗口,None表示使用全样本
|
||
"""
|
||
# 对齐数据
|
||
data = pd.concat([factors, forward_return], axis=1).dropna()
|
||
|
||
if window is not None and len(data) > window:
|
||
data = data.iloc[-window:]
|
||
|
||
if len(data) < 30:
|
||
# 如果数据不足,返回等权重
|
||
return pd.Series(1.0 / len(factors.columns), index=factors.columns)
|
||
|
||
y = data.iloc[:, -1].values
|
||
X = data.iloc[:, :-1].values
|
||
|
||
try:
|
||
model = OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 6})
|
||
betas = pd.Series(model.params, index=factors.columns)
|
||
|
||
# 归一化权重(取绝对值后归一化)
|
||
weights = np.abs(betas) / (np.abs(betas).sum() + 1e-8)
|
||
return weights
|
||
except Exception as e:
|
||
print(f"回归权重计算出错: {e}")
|
||
# 返回等权重
|
||
return pd.Series(1.0 / len(factors.columns), index=factors.columns)
|
||
|
||
|
||
def equal_weights(factors: pd.DataFrame) -> pd.Series:
|
||
"""等权重"""
|
||
n = len(factors.columns)
|
||
return pd.Series(1.0 / n, index=factors.columns)
|
||
|
||
|
||
class MultiFactorModel:
|
||
"""多因子模型"""
|
||
|
||
def __init__(self, weight_method: str = 'risk_parity'):
|
||
"""
|
||
Parameters:
|
||
-----------
|
||
weight_method : str
|
||
权重方法:'risk_parity', 'regression', 'equal'
|
||
"""
|
||
self.weight_method = weight_method
|
||
self.weights: Optional[pd.Series] = None
|
||
self.factor_names: List[str] = []
|
||
|
||
def fit(
|
||
self,
|
||
factors: pd.DataFrame,
|
||
forward_return: Optional[pd.Series] = None,
|
||
window: Optional[int] = None
|
||
):
|
||
"""
|
||
拟合多因子模型
|
||
|
||
Parameters:
|
||
-----------
|
||
factors : DataFrame
|
||
因子数据框
|
||
forward_return : Series, optional
|
||
未来收益率(回归权重需要)
|
||
window : int, optional
|
||
滚动窗口
|
||
"""
|
||
self.factor_names = list(factors.columns)
|
||
|
||
if self.weight_method == 'risk_parity':
|
||
self.weights = risk_parity_weights(factors, window=window or 30)
|
||
elif self.weight_method == 'regression':
|
||
if forward_return is None:
|
||
raise ValueError("回归权重需要提供forward_return")
|
||
self.weights = regression_weights(factors, forward_return, window=window)
|
||
elif self.weight_method == 'equal':
|
||
self.weights = equal_weights(factors)
|
||
else:
|
||
raise ValueError(f"未知的权重方法: {self.weight_method}")
|
||
|
||
def predict(self, factors: pd.DataFrame) -> pd.Series:
|
||
"""
|
||
计算多因子综合得分
|
||
|
||
Parameters:
|
||
-----------
|
||
factors : DataFrame
|
||
因子数据框
|
||
|
||
Returns:
|
||
--------
|
||
Series: 综合得分
|
||
"""
|
||
if self.weights is None:
|
||
raise ValueError("模型尚未拟合,请先调用fit()")
|
||
|
||
# 确保因子顺序一致
|
||
factors_aligned = factors[self.factor_names]
|
||
|
||
# 计算加权得分
|
||
score = (factors_aligned * self.weights).sum(axis=1)
|
||
|
||
return score
|
||
|
||
def get_weights(self) -> pd.Series:
|
||
"""获取因子权重"""
|
||
return self.weights.copy() if self.weights is not None else pd.Series()
|
||
|