""" 算子系统:基础数学算子和技术指标算子的注册与管理 支持算子的注册、查询、反射调用 """ import numpy as np import pandas as pd from typing import Dict, Callable, List, Optional, Any from abc import ABC, abstractmethod import inspect class Operator(ABC): """算子基类""" def __init__(self, name: str, func: Callable, description: str = ""): """ Parameters: ----------- name : str 算子名称(唯一标识) func : Callable 算子函数 description : str 算子描述 """ self.name = name self.func = func self.description = description self._signature = inspect.signature(func) def __call__(self, *args, **kwargs): """调用算子函数""" return self.func(*args, **kwargs) def get_signature(self): """获取函数签名""" return self._signature def __repr__(self): return f"Operator(name='{self.name}', description='{self.description}')" class OperatorRegistry: """算子注册表""" def __init__(self): self._operators: Dict[str, Operator] = {} def register(self, operator: Operator): """注册算子""" if operator.name in self._operators: raise ValueError(f"算子 '{operator.name}' 已存在") self._operators[operator.name] = operator def register_function(self, name: str, func: Callable, description: str = ""): """直接注册函数为算子""" operator = Operator(name, func, description) self.register(operator) def get(self, name: str) -> Optional[Operator]: """获取算子""" return self._operators.get(name) def has(self, name: str) -> bool: """检查算子是否存在""" return name in self._operators def list_all(self) -> List[str]: """列出所有算子名称""" return list(self._operators.keys()) def get_all(self) -> Dict[str, Operator]: """获取所有算子""" return self._operators.copy() # 全局算子注册表 _registry = OperatorRegistry() def register_operator(name: str, description: str = ""): """装饰器:注册算子""" def decorator(func: Callable): _registry.register_function(name, func, description) return func return decorator def get_operator(name: str) -> Optional[Operator]: """获取算子""" return _registry.get(name) def get_registry() -> OperatorRegistry: """获取全局注册表""" return _registry # ==================== 基础数学算子 ==================== @register_operator("add", "加法: x + y") def _add(x: np.ndarray, y: np.ndarray) -> np.ndarray: return x + y @register_operator("sub", "减法: x - y") def _sub(x: np.ndarray, y: np.ndarray) -> np.ndarray: return x - y @register_operator("mul", "乘法: x * y") def _mul(x: np.ndarray, y: np.ndarray) -> np.ndarray: return x * y @register_operator("div", "除法: x / y (安全除法)") def _div(x: np.ndarray, y: np.ndarray) -> np.ndarray: denom = np.where(np.abs(y) < 1e-12, np.nan, y) return x / denom @register_operator("neg", "取负: -x") def _neg(x: np.ndarray) -> np.ndarray: return np.negative(x) @register_operator("abs", "绝对值: |x|") def _abs(x: np.ndarray) -> np.ndarray: return np.abs(x) @register_operator("log", "对数: log(|x|)") def _log(x: np.ndarray) -> np.ndarray: return np.log(np.clip(np.abs(x), 1e-12, None)) @register_operator("sqrt", "平方根: sqrt(x)") def _sqrt(x: np.ndarray) -> np.ndarray: return np.sqrt(np.clip(x, 0.0, None)) @register_operator("pow", "幂运算: x^y (限制范围)") def _pow(x: np.ndarray, y: np.ndarray) -> np.ndarray: y_clip = np.clip(y, -3.0, 3.0) with np.errstate(over="ignore", invalid="ignore"): out = np.power(np.clip(x, -1e6, 1e6), y_clip) out[~np.isfinite(out)] = np.nan return out # ==================== 时间序列算子 ==================== def _rolling_mean(x: np.ndarray, window: int) -> np.ndarray: s = pd.Series(x) return s.rolling(window, min_periods=max(2, window // 2)).mean().to_numpy() def _rolling_std(x: np.ndarray, window: int) -> np.ndarray: s = pd.Series(x) return s.rolling(window, min_periods=max(2, window // 2)).std().to_numpy() def _ts_delta(x: np.ndarray, period: int) -> np.ndarray: s = pd.Series(x) return s.diff(period).to_numpy() def _ts_rank(x: np.ndarray, window: int) -> np.ndarray: s = pd.Series(x) return ( s.rolling(window, min_periods=max(2, window // 2)) .apply(lambda a: pd.Series(a).rank(pct=True).iloc[-1], raw=False) .to_numpy() ) def _delay(x: np.ndarray, period: int) -> np.ndarray: s = pd.Series(x) return s.shift(period).to_numpy() # 注册时间序列算子(带不同窗口) for w in (3, 6, 12, 24, 48, 96): _registry.register_function( f"sma{w}", lambda x, w=w: _rolling_mean(x, w), f"简单移动平均: SMA(x, {w})" ) _registry.register_function( f"std{w}", lambda x, w=w: _rolling_std(x, w), f"滚动标准差: STD(x, {w})" ) _registry.register_function( f"rank{w}", lambda x, w=w: _ts_rank(x, w), f"滚动排名: RANK(x, {w})" ) _registry.register_function( f"delta{w}", lambda x, w=w: _ts_delta(x, w), f"差分: DELTA(x, {w})" ) _registry.register_function( f"delay{w}", lambda x, w=w: _delay(x, w), f"延迟: DELAY(x, {w})" ) # ==================== 因子公式解析与计算 ==================== class FactorFormula: """因子公式:支持序列化和反序列化""" def __init__(self, expression: str, feature_names: List[str]): """ Parameters: ----------- expression : str 因子表达式(使用算子名称) feature_names : List[str] 特征名称列表 """ self.expression = expression self.feature_names = feature_names def compute(self, features: Dict[str, np.ndarray]) -> np.ndarray: """ 计算因子值 Parameters: ----------- features : Dict[str, np.ndarray] 特征字典,key为特征名称 Returns: -------- np.ndarray: 因子值 """ # 构建计算环境 env = {} # 添加特征 for name in self.feature_names: if name not in features: raise KeyError(f"特征 '{name}' 不存在") env[name] = features[name] # 添加算子 for op_name in _registry.list_all(): op = _registry.get(op_name) if op: env[op_name] = op.func # 添加numpy和pandas(用于某些表达式) env["np"] = np env["pd"] = pd # 执行表达式 try: # 限制可用的内置函数 safe_builtins = { "abs": abs, "min": min, "max": max, "sum": sum, "len": len, } result = eval(self.expression, {"__builtins__": safe_builtins}, env) # 确保结果是numpy数组 if not isinstance(result, np.ndarray): if isinstance(result, (int, float)): # 标量转换为数组(广播) result = np.full(len(features[self.feature_names[0]]), result) else: result = np.array(result) # 确保长度一致 expected_len = len(features[self.feature_names[0]]) if len(result) != expected_len: raise ValueError( f"表达式结果长度 {len(result)} 与特征长度 {expected_len} 不匹配" ) return result except Exception as e: raise RuntimeError(f"计算因子表达式失败: {e}\n表达式: {self.expression}") def to_dict(self) -> Dict: """序列化为字典""" return {"expression": self.expression, "feature_names": self.feature_names} @classmethod def from_dict(cls, data: Dict) -> "FactorFormula": """从字典反序列化""" return cls(data["expression"], data["feature_names"]) def __repr__(self): return f"FactorFormula(expression='{self.expression}', features={self.feature_names})"