""" 因子层抽象接口(通用) 只提供抽象基类和注册机制,具体因子实现在strategies/shared/factors/ """ from abc import ABC, abstractmethod from typing import Dict, List, Optional, Any, Type import pandas as pd import numpy as np class FactorBase(ABC): """ 因子抽象基类 所有因子必须实现compute方法 """ name: str = "base" category: str = "unknown" def __init__(self, **params): """初始化因子参数""" self._params = params @abstractmethod def compute(self, data: pd.DataFrame) -> pd.Series: """ 计算因子值 Args: data: OHLCV数据,必须包含'close'列 Returns: 因子值序列 """ pass def validate_data(self, data: pd.DataFrame) -> bool: """验证数据是否满足计算要求""" if 'close' not in data.columns: return False min_periods = self._params.get('min_periods', 20) return len(data) >= min_periods def __repr__(self) -> str: params_str = ', '.join([f"{k}={v}" for k, v in self._params.items()]) return f"{self.__class__.__name__}({params_str})" class FactorRegistry: """ 因子注册器(通用) 管理因子类的注册和获取 """ _factors: Dict[str, Type[FactorBase]] = {} @classmethod def register(cls, factor_class: Type[FactorBase]) -> None: """注册因子类""" temp_instance = factor_class() name = temp_instance.name if name in cls._factors: print(f"因子已注册,覆盖: {name}") cls._factors[name] = factor_class @classmethod def get(cls, name: str, **params) -> FactorBase: """获取因子实例""" if name not in cls._factors: raise ValueError(f"因子未注册: {name}") factor_class = cls._factors[name] return factor_class(**params) @classmethod def list_factors(cls) -> List[str]: """列出所有已注册因子""" return list(cls._factors.keys()) @classmethod def clear(cls) -> None: """清空注册表""" cls._factors = {} @classmethod def get_category(cls, name: str) -> str: """获取因子类别""" if name not in cls._factors: return "unknown" temp_instance = cls._factors[name]() return temp_instance.category class FactorCombiner: """ 因子组合器(通用) 支持多因子加权组合 """ SUPPORTED_METHODS = ['weighted_sum', 'rank_average', 'zscore_sum', 'equal_weight'] def __init__( self, factors: List[FactorBase], weights: Optional[List[float]] = None, method: str = 'weighted_sum' ): """ 初始化组合器 Args: factors: 因子实例列表 weights: 因子权重列表(可选) method: 组合方法(weighted_sum/rank_average/zscore_sum/equal_weight) """ if not factors: raise ValueError("factors list cannot be empty") if method not in self.SUPPORTED_METHODS: raise ValueError(f"Unsupported method: {method}") self._factors = factors if weights is None: self._weights = [1.0 / len(factors)] * len(factors) else: if len(weights) != len(factors): raise ValueError("weights length must match factors length") self._weights = weights self._method = method def compute(self, data: pd.DataFrame) -> pd.DataFrame: """ 计算所有因子并组合 Returns: DataFrame包含各因子值和combined列 """ result = pd.DataFrame(index=data.index) # 计算各因子 for i, factor in enumerate(self._factors): factor_values = factor.compute(data) col_name = f"{factor.name}" result[col_name] = factor_values # 组合因子值 if self._method == 'weighted_sum': weighted_cols = [f.name for f in self._factors] result['combined'] = result[weighted_cols].apply( lambda row: sum(row[col] * self._weights[i] for i, col in enumerate(weighted_cols) if pd.notna(row[col])), axis=1 ) elif self._method == 'equal_weight': factor_cols = [f.name for f in self._factors] result['combined'] = result[factor_cols].mean(axis=1) elif self._method == 'rank_average': factor_cols = [f.name for f in self._factors] ranks = result[factor_cols].rank(axis=1) result['combined'] = ranks.mean(axis=1) elif self._method == 'zscore_sum': factor_cols = [f.name for f in self._factors] zscores = result[factor_cols].apply(lambda x: (x - x.mean()) / x.std()) result['combined'] = zscores.sum(axis=1) return result def get_factor_names(self) -> List[str]: """获取因子名称列表""" return [f.name for f in self._factors] def __repr__(self) -> str: factor_names = [f.name for f in self._factors] return f"FactorCombiner(factors={factor_names}, weights={self._weights}, method={self._method})" # 导出抽象接口 __all__ = ['FactorBase', 'FactorRegistry', 'FactorCombiner']