298 lines
8.3 KiB
Python
298 lines
8.3 KiB
Python
"""
|
||
算子系统:基础数学算子和技术指标算子的注册与管理
|
||
支持算子的注册、查询、反射调用
|
||
"""
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
from typing import Dict, Callable, List, Optional, Any
|
||
from abc import ABC, abstractmethod
|
||
import inspect
|
||
|
||
|
||
class Operator(ABC):
|
||
"""算子基类"""
|
||
|
||
def __init__(self, name: str, func: Callable, description: str = ""):
|
||
"""
|
||
Parameters:
|
||
-----------
|
||
name : str
|
||
算子名称(唯一标识)
|
||
func : Callable
|
||
算子函数
|
||
description : str
|
||
算子描述
|
||
"""
|
||
self.name = name
|
||
self.func = func
|
||
self.description = description
|
||
self._signature = inspect.signature(func)
|
||
|
||
def __call__(self, *args, **kwargs):
|
||
"""调用算子函数"""
|
||
return self.func(*args, **kwargs)
|
||
|
||
def get_signature(self):
|
||
"""获取函数签名"""
|
||
return self._signature
|
||
|
||
def __repr__(self):
|
||
return f"Operator(name='{self.name}', description='{self.description}')"
|
||
|
||
|
||
class OperatorRegistry:
|
||
"""算子注册表"""
|
||
|
||
def __init__(self):
|
||
self._operators: Dict[str, Operator] = {}
|
||
|
||
def register(self, operator: Operator):
|
||
"""注册算子"""
|
||
if operator.name in self._operators:
|
||
raise ValueError(f"算子 '{operator.name}' 已存在")
|
||
self._operators[operator.name] = operator
|
||
|
||
def register_function(self, name: str, func: Callable, description: str = ""):
|
||
"""直接注册函数为算子"""
|
||
operator = Operator(name, func, description)
|
||
self.register(operator)
|
||
|
||
def get(self, name: str) -> Optional[Operator]:
|
||
"""获取算子"""
|
||
return self._operators.get(name)
|
||
|
||
def has(self, name: str) -> bool:
|
||
"""检查算子是否存在"""
|
||
return name in self._operators
|
||
|
||
def list_all(self) -> List[str]:
|
||
"""列出所有算子名称"""
|
||
return list(self._operators.keys())
|
||
|
||
def get_all(self) -> Dict[str, Operator]:
|
||
"""获取所有算子"""
|
||
return self._operators.copy()
|
||
|
||
|
||
# 全局算子注册表
|
||
_registry = OperatorRegistry()
|
||
|
||
|
||
def register_operator(name: str, description: str = ""):
|
||
"""装饰器:注册算子"""
|
||
|
||
def decorator(func: Callable):
|
||
_registry.register_function(name, func, description)
|
||
return func
|
||
|
||
return decorator
|
||
|
||
|
||
def get_operator(name: str) -> Optional[Operator]:
|
||
"""获取算子"""
|
||
return _registry.get(name)
|
||
|
||
|
||
def get_registry() -> OperatorRegistry:
|
||
"""获取全局注册表"""
|
||
return _registry
|
||
|
||
|
||
# ==================== 基础数学算子 ====================
|
||
|
||
|
||
@register_operator("add", "加法: x + y")
|
||
def _add(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
||
return x + y
|
||
|
||
|
||
@register_operator("sub", "减法: x - y")
|
||
def _sub(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
||
return x - y
|
||
|
||
|
||
@register_operator("mul", "乘法: x * y")
|
||
def _mul(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
||
return x * y
|
||
|
||
|
||
@register_operator("div", "除法: x / y (安全除法)")
|
||
def _div(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
||
denom = np.where(np.abs(y) < 1e-12, np.nan, y)
|
||
return x / denom
|
||
|
||
|
||
@register_operator("neg", "取负: -x")
|
||
def _neg(x: np.ndarray) -> np.ndarray:
|
||
return np.negative(x)
|
||
|
||
|
||
@register_operator("abs", "绝对值: |x|")
|
||
def _abs(x: np.ndarray) -> np.ndarray:
|
||
return np.abs(x)
|
||
|
||
|
||
@register_operator("log", "对数: log(|x|)")
|
||
def _log(x: np.ndarray) -> np.ndarray:
|
||
return np.log(np.clip(np.abs(x), 1e-12, None))
|
||
|
||
|
||
@register_operator("sqrt", "平方根: sqrt(x)")
|
||
def _sqrt(x: np.ndarray) -> np.ndarray:
|
||
return np.sqrt(np.clip(x, 0.0, None))
|
||
|
||
|
||
@register_operator("pow", "幂运算: x^y (限制范围)")
|
||
def _pow(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
||
y_clip = np.clip(y, -3.0, 3.0)
|
||
with np.errstate(over="ignore", invalid="ignore"):
|
||
out = np.power(np.clip(x, -1e6, 1e6), y_clip)
|
||
out[~np.isfinite(out)] = np.nan
|
||
return out
|
||
|
||
|
||
# ==================== 时间序列算子 ====================
|
||
|
||
|
||
def _rolling_mean(x: np.ndarray, window: int) -> np.ndarray:
|
||
s = pd.Series(x)
|
||
return s.rolling(window, min_periods=max(2, window // 2)).mean().to_numpy()
|
||
|
||
|
||
def _rolling_std(x: np.ndarray, window: int) -> np.ndarray:
|
||
s = pd.Series(x)
|
||
return s.rolling(window, min_periods=max(2, window // 2)).std().to_numpy()
|
||
|
||
|
||
def _ts_delta(x: np.ndarray, period: int) -> np.ndarray:
|
||
s = pd.Series(x)
|
||
return s.diff(period).to_numpy()
|
||
|
||
|
||
def _ts_rank(x: np.ndarray, window: int) -> np.ndarray:
|
||
s = pd.Series(x)
|
||
return (
|
||
s.rolling(window, min_periods=max(2, window // 2))
|
||
.apply(lambda a: pd.Series(a).rank(pct=True).iloc[-1], raw=False)
|
||
.to_numpy()
|
||
)
|
||
|
||
|
||
def _delay(x: np.ndarray, period: int) -> np.ndarray:
|
||
s = pd.Series(x)
|
||
return s.shift(period).to_numpy()
|
||
|
||
|
||
# 注册时间序列算子(带不同窗口)
|
||
for w in (3, 6, 12, 24, 48, 96):
|
||
_registry.register_function(
|
||
f"sma{w}", lambda x, w=w: _rolling_mean(x, w), f"简单移动平均: SMA(x, {w})"
|
||
)
|
||
_registry.register_function(
|
||
f"std{w}", lambda x, w=w: _rolling_std(x, w), f"滚动标准差: STD(x, {w})"
|
||
)
|
||
_registry.register_function(
|
||
f"rank{w}", lambda x, w=w: _ts_rank(x, w), f"滚动排名: RANK(x, {w})"
|
||
)
|
||
_registry.register_function(
|
||
f"delta{w}", lambda x, w=w: _ts_delta(x, w), f"差分: DELTA(x, {w})"
|
||
)
|
||
_registry.register_function(
|
||
f"delay{w}", lambda x, w=w: _delay(x, w), f"延迟: DELAY(x, {w})"
|
||
)
|
||
|
||
|
||
# ==================== 因子公式解析与计算 ====================
|
||
|
||
|
||
class FactorFormula:
|
||
"""因子公式:支持序列化和反序列化"""
|
||
|
||
def __init__(self, expression: str, feature_names: List[str]):
|
||
"""
|
||
Parameters:
|
||
-----------
|
||
expression : str
|
||
因子表达式(使用算子名称)
|
||
feature_names : List[str]
|
||
特征名称列表
|
||
"""
|
||
self.expression = expression
|
||
self.feature_names = feature_names
|
||
|
||
def compute(self, features: Dict[str, np.ndarray]) -> np.ndarray:
|
||
"""
|
||
计算因子值
|
||
|
||
Parameters:
|
||
-----------
|
||
features : Dict[str, np.ndarray]
|
||
特征字典,key为特征名称
|
||
|
||
Returns:
|
||
--------
|
||
np.ndarray: 因子值
|
||
"""
|
||
# 构建计算环境
|
||
env = {}
|
||
|
||
# 添加特征
|
||
for name in self.feature_names:
|
||
if name not in features:
|
||
raise KeyError(f"特征 '{name}' 不存在")
|
||
env[name] = features[name]
|
||
|
||
# 添加算子
|
||
for op_name in _registry.list_all():
|
||
op = _registry.get(op_name)
|
||
if op:
|
||
env[op_name] = op.func
|
||
|
||
# 添加numpy和pandas(用于某些表达式)
|
||
env["np"] = np
|
||
env["pd"] = pd
|
||
|
||
# 执行表达式
|
||
try:
|
||
# 限制可用的内置函数
|
||
safe_builtins = {
|
||
"abs": abs,
|
||
"min": min,
|
||
"max": max,
|
||
"sum": sum,
|
||
"len": len,
|
||
}
|
||
result = eval(self.expression, {"__builtins__": safe_builtins}, env)
|
||
|
||
# 确保结果是numpy数组
|
||
if not isinstance(result, np.ndarray):
|
||
if isinstance(result, (int, float)):
|
||
# 标量转换为数组(广播)
|
||
result = np.full(len(features[self.feature_names[0]]), result)
|
||
else:
|
||
result = np.array(result)
|
||
|
||
# 确保长度一致
|
||
expected_len = len(features[self.feature_names[0]])
|
||
if len(result) != expected_len:
|
||
raise ValueError(
|
||
f"表达式结果长度 {len(result)} 与特征长度 {expected_len} 不匹配"
|
||
)
|
||
|
||
return result
|
||
except Exception as e:
|
||
raise RuntimeError(f"计算因子表达式失败: {e}\n表达式: {self.expression}")
|
||
|
||
def to_dict(self) -> Dict:
|
||
"""序列化为字典"""
|
||
return {"expression": self.expression, "feature_names": self.feature_names}
|
||
|
||
@classmethod
|
||
def from_dict(cls, data: Dict) -> "FactorFormula":
|
||
"""从字典反序列化"""
|
||
return cls(data["expression"], data["feature_names"])
|
||
|
||
def __repr__(self):
|
||
return f"FactorFormula(expression='{self.expression}', features={self.feature_names})"
|