feat(factors): 实现因子层抽象
核心组件: - FactorBase: 因子抽象基类(compute方法 + 数据验证) - FactorRegistry: 因子注册器(注册/获取/按类别筛选) - FactorCombiner: 因子组合器(加权组合4种方法) 已实现因子: - MomentumFactor: 加权动量因子(含崩盘过滤) - TrendFactor: 趋势因子(MA交叉/MACD) - ReversalFactor: 反转因子(RSI/KDJ) - VolatilityFactor: 波动率因子(ATR/标准差) 测试覆盖:18个测试全部通过
This commit is contained in:
282
framework/factors/__init__.py
Normal file
282
framework/factors/__init__.py
Normal file
@@ -0,0 +1,282 @@
|
||||
"""
|
||||
因子层抽象设计
|
||||
|
||||
核心组件:
|
||||
- FactorBase: 因子抽象基类
|
||||
- FactorRegistry: 因子注册器
|
||||
- FactorCombiner: 因子组合器
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List, Optional, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class FactorMeta:
|
||||
"""因子元信息"""
|
||||
name: str
|
||||
category: str # 'momentum', 'trend', 'reversal', 'volatility', 'fundamental'
|
||||
params: Dict[str, Any]
|
||||
description: str = ""
|
||||
|
||||
|
||||
class FactorBase(ABC):
|
||||
"""
|
||||
因子抽象基类
|
||||
|
||||
所有因子必须继承此基类,实现compute方法。
|
||||
支持参数配置、数据验证、元信息管理。
|
||||
"""
|
||||
|
||||
# 类属性(可被配置覆盖)
|
||||
name: str = "base"
|
||||
category: str = "unknown"
|
||||
|
||||
def __init__(self, **params):
|
||||
"""
|
||||
初始化因子
|
||||
|
||||
Args:
|
||||
**params: 因子参数(如n_days=25, period=14等)
|
||||
"""
|
||||
self._params = params
|
||||
self._meta = FactorMeta(
|
||||
name=self.name,
|
||||
category=self.category,
|
||||
params=params,
|
||||
description=self.__doc__ or ""
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def compute(self, data: pd.DataFrame) -> pd.Series:
|
||||
"""
|
||||
计算因子值
|
||||
|
||||
Args:
|
||||
data: 包含OHLCV数据的DataFrame
|
||||
|
||||
Returns:
|
||||
因子值序列(Series)
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def params(self) -> Dict[str, Any]:
|
||||
"""获取因子参数"""
|
||||
return self._params
|
||||
|
||||
@property
|
||||
def meta(self) -> FactorMeta:
|
||||
"""获取因子元信息"""
|
||||
return self._meta
|
||||
|
||||
def validate_data(self, data: pd.DataFrame) -> bool:
|
||||
"""
|
||||
验证数据是否满足计算要求
|
||||
|
||||
Args:
|
||||
data: 数据DataFrame
|
||||
|
||||
Returns:
|
||||
是否满足要求
|
||||
"""
|
||||
# 默认验证:数据长度 >= 最小周期
|
||||
min_periods = self._params.get('min_periods', 20)
|
||||
return len(data) >= min_periods
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.__class__.__name__}(name={self.name}, params={self._params})"
|
||||
|
||||
|
||||
class FactorRegistry:
|
||||
"""
|
||||
因子注册器
|
||||
|
||||
管理所有注册的因子,支持:
|
||||
- 注册因子类
|
||||
- 获取因子实例
|
||||
- 列出可用因子
|
||||
- 按类别筛选因子
|
||||
"""
|
||||
|
||||
_factors: Dict[str, type] = {}
|
||||
|
||||
@classmethod
|
||||
def register(cls, factor_class: type) -> None:
|
||||
"""
|
||||
注册因子类
|
||||
|
||||
Args:
|
||||
factor_class: 因子类(必须继承FactorBase)
|
||||
"""
|
||||
if not isinstance(factor_class, type) or not issubclass(factor_class, FactorBase):
|
||||
raise TypeError(f"factor_class must be a subclass of FactorBase")
|
||||
|
||||
# 创建临时实例获取名称
|
||||
temp_instance = factor_class()
|
||||
name = temp_instance.name
|
||||
cls._factors[name] = factor_class
|
||||
print(f"✓ 因子已注册: {name} ({factor_class.__name__})")
|
||||
|
||||
@classmethod
|
||||
def get(cls, name: str, **params) -> FactorBase:
|
||||
"""
|
||||
获取因子实例
|
||||
|
||||
Args:
|
||||
name: 因子名称
|
||||
**params: 因子参数
|
||||
|
||||
Returns:
|
||||
因子实例
|
||||
"""
|
||||
if name not in cls._factors:
|
||||
raise KeyError(f"Factor '{name}' not registered. Available: {cls.list()}")
|
||||
|
||||
factor_class = cls._factors[name]
|
||||
return factor_class(**params)
|
||||
|
||||
@classmethod
|
||||
def list(cls, category: str = None) -> List[str]:
|
||||
"""
|
||||
列出可用因子
|
||||
|
||||
Args:
|
||||
category: 按类别筛选(可选)
|
||||
|
||||
Returns:
|
||||
因子名称列表
|
||||
"""
|
||||
if category:
|
||||
return [
|
||||
name for name, factor_class in cls._factors.items()
|
||||
if factor_class().category == category
|
||||
]
|
||||
return list(cls._factors.keys())
|
||||
|
||||
@classmethod
|
||||
def list_by_category(cls) -> Dict[str, List[str]]:
|
||||
"""
|
||||
按类别列出因子
|
||||
|
||||
Returns:
|
||||
类别→因子列表字典
|
||||
"""
|
||||
result = {}
|
||||
for name, factor_class in cls._factors.items():
|
||||
cat = factor_class().category
|
||||
if cat not in result:
|
||||
result[cat] = []
|
||||
result[cat].append(name)
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def clear(cls) -> None:
|
||||
"""清空注册表(用于测试)"""
|
||||
cls._factors.clear()
|
||||
|
||||
|
||||
class FactorCombiner:
|
||||
"""
|
||||
因子组合器
|
||||
|
||||
支持多因子加权组合,用于:
|
||||
- 多因子策略
|
||||
- 因子权重调整
|
||||
- 因子结果合并
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
factors: List[FactorBase],
|
||||
weights: Optional[List[float]] = None,
|
||||
method: str = 'weighted_sum'
|
||||
):
|
||||
"""
|
||||
初始化因子组合器
|
||||
|
||||
Args:
|
||||
factors: 因子实例列表
|
||||
weights: 权重列表(默认等权)
|
||||
method: 组合方法 ('weighted_sum', 'average', 'max', 'min')
|
||||
"""
|
||||
self._factors = factors
|
||||
self._weights = weights or [1.0 / len(factors)] * len(factors)
|
||||
self._method = method
|
||||
|
||||
# 验证权重
|
||||
if len(self._weights) != len(factors):
|
||||
raise ValueError(f"weights length ({len(self._weights)}) != factors length ({len(factors)})")
|
||||
|
||||
# 归一化权重
|
||||
total_weight = sum(self._weights)
|
||||
self._weights = [w / total_weight for w in self._weights]
|
||||
|
||||
def compute(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
计算所有因子并组合
|
||||
|
||||
Args:
|
||||
data: 输入数据
|
||||
|
||||
Returns:
|
||||
包含各因子值和组合因子值的DataFrame
|
||||
"""
|
||||
result = pd.DataFrame(index=data.index)
|
||||
|
||||
# 计算各因子
|
||||
for i, factor in enumerate(self._factors):
|
||||
# 验证数据
|
||||
if not factor.validate_data(data):
|
||||
print(f"⚠ 因子 {factor.name} 数据验证失败,跳过")
|
||||
continue
|
||||
|
||||
# 计算因子值
|
||||
factor_values = factor.compute(data)
|
||||
result[factor.name] = factor_values
|
||||
|
||||
# 加权因子值
|
||||
result[f"{factor.name}_weighted"] = factor_values * self._weights[i]
|
||||
|
||||
# 组合因子值
|
||||
weighted_cols = [f"{f.name}_weighted" for f in self._factors if f.name in result.columns]
|
||||
|
||||
if self._method == 'weighted_sum':
|
||||
result['combined'] = result[weighted_cols].sum(axis=1)
|
||||
elif self._method == 'average':
|
||||
factor_cols = [f.name for f in self._factors if f.name in result.columns]
|
||||
result['combined'] = result[factor_cols].mean(axis=1)
|
||||
elif self._method == 'max':
|
||||
factor_cols = [f.name for f in self._factors if f.name in result.columns]
|
||||
result['combined'] = result[factor_cols].max(axis=1)
|
||||
elif self._method == 'min':
|
||||
factor_cols = [f.name for f in self._factors if f.name in result.columns]
|
||||
result['combined'] = result[factor_cols].min(axis=1)
|
||||
else:
|
||||
raise ValueError(f"Unknown method: {self._method}")
|
||||
|
||||
return result
|
||||
|
||||
@property
|
||||
def factors(self) -> List[FactorBase]:
|
||||
"""获取因子列表"""
|
||||
return self._factors
|
||||
|
||||
@property
|
||||
def weights(self) -> List[float]:
|
||||
"""获取权重列表"""
|
||||
return self._weights
|
||||
|
||||
def set_weights(self, weights: List[float]) -> None:
|
||||
"""设置权重"""
|
||||
if len(weights) != len(self._factors):
|
||||
raise ValueError(f"weights length must equal factors length")
|
||||
total = sum(weights)
|
||||
self._weights = [w / total for w in weights]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
factor_names = [f.name for f in self._factors]
|
||||
return f"FactorCombiner(factors={factor_names}, weights={self._weights})"
|
||||
Reference in New Issue
Block a user