第一版流程

This commit is contained in:
2025-11-08 13:39:02 +08:00
parent dcfe2d84d5
commit a66e42a8ae
11 changed files with 1648 additions and 0 deletions

152
combination.py Normal file
View File

@@ -0,0 +1,152 @@
"""
因子组合模块:多因子模型
"""
import numpy as np
import pandas as pd
from typing import Dict, List, Optional
from statsmodels.regression.linear_model import OLS
def risk_parity_weights(factors: pd.DataFrame, window: int = 30) -> pd.Series:
"""
风险平价加权:使每个因子的风险贡献相等
Parameters:
-----------
factors : DataFrame
因子数据框
window : int
计算波动率的滚动窗口
"""
# 计算各因子的滚动波动率
volatilities = factors.rolling(window).std().iloc[-1]
# 风险平价权重w_i = 1/σ_i / Σ(1/σ_j)
inv_vol = 1.0 / (volatilities + 1e-8)
weights = inv_vol / inv_vol.sum()
return weights
def regression_weights(
factors: pd.DataFrame,
forward_return: pd.Series,
window: Optional[int] = None
) -> pd.Series:
"""
回归系数加权:用回归系数作为权重
Parameters:
-----------
factors : DataFrame
因子数据框
forward_return : Series
未来收益率
window : int, optional
滚动窗口None表示使用全样本
"""
# 对齐数据
data = pd.concat([factors, forward_return], axis=1).dropna()
if window is not None and len(data) > window:
data = data.iloc[-window:]
if len(data) < 30:
# 如果数据不足,返回等权重
return pd.Series(1.0 / len(factors.columns), index=factors.columns)
y = data.iloc[:, -1].values
X = data.iloc[:, :-1].values
try:
model = OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 6})
betas = pd.Series(model.params, index=factors.columns)
# 归一化权重(取绝对值后归一化)
weights = np.abs(betas) / (np.abs(betas).sum() + 1e-8)
return weights
except Exception as e:
print(f"回归权重计算出错: {e}")
# 返回等权重
return pd.Series(1.0 / len(factors.columns), index=factors.columns)
def equal_weights(factors: pd.DataFrame) -> pd.Series:
"""等权重"""
n = len(factors.columns)
return pd.Series(1.0 / n, index=factors.columns)
class MultiFactorModel:
"""多因子模型"""
def __init__(self, weight_method: str = 'risk_parity'):
"""
Parameters:
-----------
weight_method : str
权重方法:'risk_parity', 'regression', 'equal'
"""
self.weight_method = weight_method
self.weights: Optional[pd.Series] = None
self.factor_names: List[str] = []
def fit(
self,
factors: pd.DataFrame,
forward_return: Optional[pd.Series] = None,
window: Optional[int] = None
):
"""
拟合多因子模型
Parameters:
-----------
factors : DataFrame
因子数据框
forward_return : Series, optional
未来收益率(回归权重需要)
window : int, optional
滚动窗口
"""
self.factor_names = list(factors.columns)
if self.weight_method == 'risk_parity':
self.weights = risk_parity_weights(factors, window=window or 30)
elif self.weight_method == 'regression':
if forward_return is None:
raise ValueError("回归权重需要提供forward_return")
self.weights = regression_weights(factors, forward_return, window=window)
elif self.weight_method == 'equal':
self.weights = equal_weights(factors)
else:
raise ValueError(f"未知的权重方法: {self.weight_method}")
def predict(self, factors: pd.DataFrame) -> pd.Series:
"""
计算多因子综合得分
Parameters:
-----------
factors : DataFrame
因子数据框
Returns:
--------
Series: 综合得分
"""
if self.weights is None:
raise ValueError("模型尚未拟合请先调用fit()")
# 确保因子顺序一致
factors_aligned = factors[self.factor_names]
# 计算加权得分
score = (factors_aligned * self.weights).sum(axis=1)
return score
def get_weights(self) -> pd.Series:
"""获取因子权重"""
return self.weights.copy() if self.weights is not None else pd.Series()