Files
etf/framework/factors/__init__.py
aszerW 796a695eef feat(factors): 实现因子层抽象
核心组件:
- FactorBase: 因子抽象基类(compute方法 + 数据验证)
- FactorRegistry: 因子注册器(注册/获取/按类别筛选)
- FactorCombiner: 因子组合器(加权组合4种方法)

已实现因子:
- MomentumFactor: 加权动量因子(含崩盘过滤)
- TrendFactor: 趋势因子(MA交叉/MACD)
- ReversalFactor: 反转因子(RSI/KDJ)
- VolatilityFactor: 波动率因子(ATR/标准差)

测试覆盖:18个测试全部通过
2026-05-11 22:17:53 +08:00

282 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
因子层抽象设计
核心组件:
- FactorBase: 因子抽象基类
- FactorRegistry: 因子注册器
- FactorCombiner: 因子组合器
"""
import pandas as pd
import numpy as np
from abc import ABC, abstractmethod
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
@dataclass
class FactorMeta:
"""因子元信息"""
name: str
category: str # 'momentum', 'trend', 'reversal', 'volatility', 'fundamental'
params: Dict[str, Any]
description: str = ""
class FactorBase(ABC):
"""
因子抽象基类
所有因子必须继承此基类实现compute方法。
支持参数配置、数据验证、元信息管理。
"""
# 类属性(可被配置覆盖)
name: str = "base"
category: str = "unknown"
def __init__(self, **params):
"""
初始化因子
Args:
**params: 因子参数如n_days=25, period=14等
"""
self._params = params
self._meta = FactorMeta(
name=self.name,
category=self.category,
params=params,
description=self.__doc__ or ""
)
@abstractmethod
def compute(self, data: pd.DataFrame) -> pd.Series:
"""
计算因子值
Args:
data: 包含OHLCV数据的DataFrame
Returns:
因子值序列Series
"""
pass
@property
def params(self) -> Dict[str, Any]:
"""获取因子参数"""
return self._params
@property
def meta(self) -> FactorMeta:
"""获取因子元信息"""
return self._meta
def validate_data(self, data: pd.DataFrame) -> bool:
"""
验证数据是否满足计算要求
Args:
data: 数据DataFrame
Returns:
是否满足要求
"""
# 默认验证:数据长度 >= 最小周期
min_periods = self._params.get('min_periods', 20)
return len(data) >= min_periods
def __repr__(self) -> str:
return f"{self.__class__.__name__}(name={self.name}, params={self._params})"
class FactorRegistry:
"""
因子注册器
管理所有注册的因子,支持:
- 注册因子类
- 获取因子实例
- 列出可用因子
- 按类别筛选因子
"""
_factors: Dict[str, type] = {}
@classmethod
def register(cls, factor_class: type) -> None:
"""
注册因子类
Args:
factor_class: 因子类必须继承FactorBase
"""
if not isinstance(factor_class, type) or not issubclass(factor_class, FactorBase):
raise TypeError(f"factor_class must be a subclass of FactorBase")
# 创建临时实例获取名称
temp_instance = factor_class()
name = temp_instance.name
cls._factors[name] = factor_class
print(f"✓ 因子已注册: {name} ({factor_class.__name__})")
@classmethod
def get(cls, name: str, **params) -> FactorBase:
"""
获取因子实例
Args:
name: 因子名称
**params: 因子参数
Returns:
因子实例
"""
if name not in cls._factors:
raise KeyError(f"Factor '{name}' not registered. Available: {cls.list()}")
factor_class = cls._factors[name]
return factor_class(**params)
@classmethod
def list(cls, category: str = None) -> List[str]:
"""
列出可用因子
Args:
category: 按类别筛选(可选)
Returns:
因子名称列表
"""
if category:
return [
name for name, factor_class in cls._factors.items()
if factor_class().category == category
]
return list(cls._factors.keys())
@classmethod
def list_by_category(cls) -> Dict[str, List[str]]:
"""
按类别列出因子
Returns:
类别→因子列表字典
"""
result = {}
for name, factor_class in cls._factors.items():
cat = factor_class().category
if cat not in result:
result[cat] = []
result[cat].append(name)
return result
@classmethod
def clear(cls) -> None:
"""清空注册表(用于测试)"""
cls._factors.clear()
class FactorCombiner:
"""
因子组合器
支持多因子加权组合,用于:
- 多因子策略
- 因子权重调整
- 因子结果合并
"""
def __init__(
self,
factors: List[FactorBase],
weights: Optional[List[float]] = None,
method: str = 'weighted_sum'
):
"""
初始化因子组合器
Args:
factors: 因子实例列表
weights: 权重列表(默认等权)
method: 组合方法 ('weighted_sum', 'average', 'max', 'min')
"""
self._factors = factors
self._weights = weights or [1.0 / len(factors)] * len(factors)
self._method = method
# 验证权重
if len(self._weights) != len(factors):
raise ValueError(f"weights length ({len(self._weights)}) != factors length ({len(factors)})")
# 归一化权重
total_weight = sum(self._weights)
self._weights = [w / total_weight for w in self._weights]
def compute(self, data: pd.DataFrame) -> pd.DataFrame:
"""
计算所有因子并组合
Args:
data: 输入数据
Returns:
包含各因子值和组合因子值的DataFrame
"""
result = pd.DataFrame(index=data.index)
# 计算各因子
for i, factor in enumerate(self._factors):
# 验证数据
if not factor.validate_data(data):
print(f"⚠ 因子 {factor.name} 数据验证失败,跳过")
continue
# 计算因子值
factor_values = factor.compute(data)
result[factor.name] = factor_values
# 加权因子值
result[f"{factor.name}_weighted"] = factor_values * self._weights[i]
# 组合因子值
weighted_cols = [f"{f.name}_weighted" for f in self._factors if f.name in result.columns]
if self._method == 'weighted_sum':
result['combined'] = result[weighted_cols].sum(axis=1)
elif self._method == 'average':
factor_cols = [f.name for f in self._factors if f.name in result.columns]
result['combined'] = result[factor_cols].mean(axis=1)
elif self._method == 'max':
factor_cols = [f.name for f in self._factors if f.name in result.columns]
result['combined'] = result[factor_cols].max(axis=1)
elif self._method == 'min':
factor_cols = [f.name for f in self._factors if f.name in result.columns]
result['combined'] = result[factor_cols].min(axis=1)
else:
raise ValueError(f"Unknown method: {self._method}")
return result
@property
def factors(self) -> List[FactorBase]:
"""获取因子列表"""
return self._factors
@property
def weights(self) -> List[float]:
"""获取权重列表"""
return self._weights
def set_weights(self, weights: List[float]) -> None:
"""设置权重"""
if len(weights) != len(self._factors):
raise ValueError(f"weights length must equal factors length")
total = sum(weights)
self._weights = [w / total for w in weights]
def __repr__(self) -> str:
factor_names = [f.name for f in self._factors]
return f"FactorCombiner(factors={factor_names}, weights={self._weights})"