Files
factorhack/combination.py
2025-11-08 13:39:02 +08:00

153 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
因子组合模块:多因子模型
"""
import numpy as np
import pandas as pd
from typing import Dict, List, Optional
from statsmodels.regression.linear_model import OLS
def risk_parity_weights(factors: pd.DataFrame, window: int = 30) -> pd.Series:
"""
风险平价加权:使每个因子的风险贡献相等
Parameters:
-----------
factors : DataFrame
因子数据框
window : int
计算波动率的滚动窗口
"""
# 计算各因子的滚动波动率
volatilities = factors.rolling(window).std().iloc[-1]
# 风险平价权重w_i = 1/σ_i / Σ(1/σ_j)
inv_vol = 1.0 / (volatilities + 1e-8)
weights = inv_vol / inv_vol.sum()
return weights
def regression_weights(
factors: pd.DataFrame,
forward_return: pd.Series,
window: Optional[int] = None
) -> pd.Series:
"""
回归系数加权:用回归系数作为权重
Parameters:
-----------
factors : DataFrame
因子数据框
forward_return : Series
未来收益率
window : int, optional
滚动窗口None表示使用全样本
"""
# 对齐数据
data = pd.concat([factors, forward_return], axis=1).dropna()
if window is not None and len(data) > window:
data = data.iloc[-window:]
if len(data) < 30:
# 如果数据不足,返回等权重
return pd.Series(1.0 / len(factors.columns), index=factors.columns)
y = data.iloc[:, -1].values
X = data.iloc[:, :-1].values
try:
model = OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 6})
betas = pd.Series(model.params, index=factors.columns)
# 归一化权重(取绝对值后归一化)
weights = np.abs(betas) / (np.abs(betas).sum() + 1e-8)
return weights
except Exception as e:
print(f"回归权重计算出错: {e}")
# 返回等权重
return pd.Series(1.0 / len(factors.columns), index=factors.columns)
def equal_weights(factors: pd.DataFrame) -> pd.Series:
"""等权重"""
n = len(factors.columns)
return pd.Series(1.0 / n, index=factors.columns)
class MultiFactorModel:
"""多因子模型"""
def __init__(self, weight_method: str = 'risk_parity'):
"""
Parameters:
-----------
weight_method : str
权重方法:'risk_parity', 'regression', 'equal'
"""
self.weight_method = weight_method
self.weights: Optional[pd.Series] = None
self.factor_names: List[str] = []
def fit(
self,
factors: pd.DataFrame,
forward_return: Optional[pd.Series] = None,
window: Optional[int] = None
):
"""
拟合多因子模型
Parameters:
-----------
factors : DataFrame
因子数据框
forward_return : Series, optional
未来收益率(回归权重需要)
window : int, optional
滚动窗口
"""
self.factor_names = list(factors.columns)
if self.weight_method == 'risk_parity':
self.weights = risk_parity_weights(factors, window=window or 30)
elif self.weight_method == 'regression':
if forward_return is None:
raise ValueError("回归权重需要提供forward_return")
self.weights = regression_weights(factors, forward_return, window=window)
elif self.weight_method == 'equal':
self.weights = equal_weights(factors)
else:
raise ValueError(f"未知的权重方法: {self.weight_method}")
def predict(self, factors: pd.DataFrame) -> pd.Series:
"""
计算多因子综合得分
Parameters:
-----------
factors : DataFrame
因子数据框
Returns:
--------
Series: 综合得分
"""
if self.weights is None:
raise ValueError("模型尚未拟合请先调用fit()")
# 确保因子顺序一致
factors_aligned = factors[self.factor_names]
# 计算加权得分
score = (factors_aligned * self.weights).sum(axis=1)
return score
def get_weights(self) -> pd.Series:
"""获取因子权重"""
return self.weights.copy() if self.weights is not None else pd.Series()