""" 因子层抽象设计 核心组件: - FactorBase: 因子抽象基类 - FactorRegistry: 因子注册器 - FactorCombiner: 因子组合器 """ import pandas as pd import numpy as np from abc import ABC, abstractmethod from typing import Dict, List, Optional, Any from dataclasses import dataclass @dataclass class FactorMeta: """因子元信息""" name: str category: str # 'momentum', 'trend', 'reversal', 'volatility', 'fundamental' params: Dict[str, Any] description: str = "" class FactorBase(ABC): """ 因子抽象基类 所有因子必须继承此基类,实现compute方法。 支持参数配置、数据验证、元信息管理。 """ # 类属性(可被配置覆盖) name: str = "base" category: str = "unknown" def __init__(self, **params): """ 初始化因子 Args: **params: 因子参数(如n_days=25, period=14等) """ self._params = params self._meta = FactorMeta( name=self.name, category=self.category, params=params, description=self.__doc__ or "" ) @abstractmethod def compute(self, data: pd.DataFrame) -> pd.Series: """ 计算因子值 Args: data: 包含OHLCV数据的DataFrame Returns: 因子值序列(Series) """ pass @property def params(self) -> Dict[str, Any]: """获取因子参数""" return self._params @property def meta(self) -> FactorMeta: """获取因子元信息""" return self._meta def validate_data(self, data: pd.DataFrame) -> bool: """ 验证数据是否满足计算要求 Args: data: 数据DataFrame Returns: 是否满足要求 """ # 默认验证:数据长度 >= 最小周期 min_periods = self._params.get('min_periods', 20) return len(data) >= min_periods def __repr__(self) -> str: return f"{self.__class__.__name__}(name={self.name}, params={self._params})" class FactorRegistry: """ 因子注册器 管理所有注册的因子,支持: - 注册因子类 - 获取因子实例 - 列出可用因子 - 按类别筛选因子 """ _factors: Dict[str, type] = {} @classmethod def register(cls, factor_class: type) -> None: """ 注册因子类 Args: factor_class: 因子类(必须继承FactorBase) """ if not isinstance(factor_class, type) or not issubclass(factor_class, FactorBase): raise TypeError(f"factor_class must be a subclass of FactorBase") # 创建临时实例获取名称 temp_instance = factor_class() name = temp_instance.name cls._factors[name] = factor_class print(f"✓ 因子已注册: {name} ({factor_class.__name__})") @classmethod def get(cls, name: str, **params) -> FactorBase: """ 获取因子实例 Args: name: 因子名称 **params: 因子参数 Returns: 因子实例 """ if name not in cls._factors: raise KeyError(f"Factor '{name}' not registered. Available: {cls.list()}") factor_class = cls._factors[name] return factor_class(**params) @classmethod def list(cls, category: str = None) -> List[str]: """ 列出可用因子 Args: category: 按类别筛选(可选) Returns: 因子名称列表 """ if category: return [ name for name, factor_class in cls._factors.items() if factor_class().category == category ] return list(cls._factors.keys()) @classmethod def list_by_category(cls) -> Dict[str, List[str]]: """ 按类别列出因子 Returns: 类别→因子列表字典 """ result = {} for name, factor_class in cls._factors.items(): cat = factor_class().category if cat not in result: result[cat] = [] result[cat].append(name) return result @classmethod def clear(cls) -> None: """清空注册表(用于测试)""" cls._factors.clear() class FactorCombiner: """ 因子组合器 支持多因子加权组合,用于: - 多因子策略 - 因子权重调整 - 因子结果合并 """ def __init__( self, factors: List[FactorBase], weights: Optional[List[float]] = None, method: str = 'weighted_sum' ): """ 初始化因子组合器 Args: factors: 因子实例列表 weights: 权重列表(默认等权) method: 组合方法 ('weighted_sum', 'average', 'max', 'min') """ self._factors = factors self._weights = weights or [1.0 / len(factors)] * len(factors) self._method = method # 验证权重 if len(self._weights) != len(factors): raise ValueError(f"weights length ({len(self._weights)}) != factors length ({len(factors)})") # 归一化权重 total_weight = sum(self._weights) self._weights = [w / total_weight for w in self._weights] def compute(self, data: pd.DataFrame) -> pd.DataFrame: """ 计算所有因子并组合 Args: data: 输入数据 Returns: 包含各因子值和组合因子值的DataFrame """ result = pd.DataFrame(index=data.index) # 计算各因子 for i, factor in enumerate(self._factors): # 验证数据 if not factor.validate_data(data): print(f"⚠ 因子 {factor.name} 数据验证失败,跳过") continue # 计算因子值 factor_values = factor.compute(data) result[factor.name] = factor_values # 加权因子值 result[f"{factor.name}_weighted"] = factor_values * self._weights[i] # 组合因子值 weighted_cols = [f"{f.name}_weighted" for f in self._factors if f.name in result.columns] if self._method == 'weighted_sum': result['combined'] = result[weighted_cols].sum(axis=1) elif self._method == 'average': factor_cols = [f.name for f in self._factors if f.name in result.columns] result['combined'] = result[factor_cols].mean(axis=1) elif self._method == 'max': factor_cols = [f.name for f in self._factors if f.name in result.columns] result['combined'] = result[factor_cols].max(axis=1) elif self._method == 'min': factor_cols = [f.name for f in self._factors if f.name in result.columns] result['combined'] = result[factor_cols].min(axis=1) else: raise ValueError(f"Unknown method: {self._method}") return result @property def factors(self) -> List[FactorBase]: """获取因子列表""" return self._factors @property def weights(self) -> List[float]: """获取权重列表""" return self._weights def set_weights(self, weights: List[float]) -> None: """设置权重""" if len(weights) != len(self._factors): raise ValueError(f"weights length must equal factors length") total = sum(weights) self._weights = [w / total for w in weights] def __repr__(self) -> str: factor_names = [f.name for f in self._factors] return f"FactorCombiner(factors={factor_names}, weights={self._weights})"