添加talib算子
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""
|
||||
DEAP遗传编程挖掘器实现
|
||||
"""
|
||||
|
||||
import random
|
||||
import operator
|
||||
from typing import List, Tuple, Optional
|
||||
@@ -17,6 +18,7 @@ from data import compute_forward_returns
|
||||
@dataclass
|
||||
class GPConfig(MiningConfig):
|
||||
"""GP挖掘配置"""
|
||||
|
||||
population_size: int = 200
|
||||
generations: int = 30
|
||||
tournament_size: int = 5
|
||||
@@ -30,144 +32,142 @@ class GPConfig(MiningConfig):
|
||||
|
||||
class GPMiner(FactorMiner):
|
||||
"""DEAP遗传编程挖掘器"""
|
||||
|
||||
|
||||
def __init__(self, config: GPConfig):
|
||||
super().__init__(config)
|
||||
self.config: GPConfig = config
|
||||
self.toolbox: Optional[base.Toolbox] = None
|
||||
self.pset: Optional[gp.PrimitiveSetTyped] = None
|
||||
self.features: Optional[List[pd.Series]] = None
|
||||
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "gp"
|
||||
|
||||
|
||||
def _build_pset(self, feature_names: List[str]) -> gp.PrimitiveSetTyped:
|
||||
"""构建GP原始集合"""
|
||||
registry = get_registry()
|
||||
pset = gp.PrimitiveSetTyped("MAIN", [np.ndarray for _ in feature_names], np.ndarray)
|
||||
|
||||
pset = gp.PrimitiveSetTyped(
|
||||
"MAIN", [np.ndarray for _ in feature_names], np.ndarray
|
||||
)
|
||||
|
||||
# 命名参数
|
||||
for i, name in enumerate(feature_names):
|
||||
pset.renameArguments(**{f"ARG{i}": name})
|
||||
|
||||
|
||||
# 添加算子
|
||||
for op_name in registry.list_all():
|
||||
op = registry.get(op_name)
|
||||
if op:
|
||||
sig = op.get_signature()
|
||||
params = list(sig.parameters.values())
|
||||
|
||||
|
||||
# 根据参数数量判断是一元还是二元算子
|
||||
if len(params) == 1:
|
||||
# 一元算子
|
||||
pset.addPrimitive(op.func, [np.ndarray], np.ndarray, name=op_name)
|
||||
elif len(params) == 2:
|
||||
# 二元算子
|
||||
pset.addPrimitive(op.func, [np.ndarray, np.ndarray], np.ndarray, name=op_name)
|
||||
|
||||
pset.addPrimitive(
|
||||
op.func, [np.ndarray, np.ndarray], np.ndarray, name=op_name
|
||||
)
|
||||
|
||||
# 添加常量
|
||||
def _const() -> np.ndarray:
|
||||
return np.array(random.uniform(-2.0, 2.0))
|
||||
pset.addEphemeralConstant("const", _const, np.ndarray)
|
||||
|
||||
# def _const() -> np.ndarray:
|
||||
# return np.array(random.uniform(-2.0, 2.0))
|
||||
# pset.addEphemeralConstant("const", _const, np.ndarray)
|
||||
|
||||
return pset
|
||||
|
||||
def _evaluate_individual(
|
||||
self,
|
||||
individual,
|
||||
target: pd.Series
|
||||
) -> Tuple[float]:
|
||||
|
||||
def _evaluate_individual(self, individual, target: pd.Series) -> Tuple[float]:
|
||||
"""评估个体适应度"""
|
||||
func = self.toolbox.compile(expr=individual)
|
||||
|
||||
|
||||
# 构建特征矩阵
|
||||
idx = target.index
|
||||
inputs = [f.reindex(idx).to_numpy() for f in self.features]
|
||||
|
||||
|
||||
try:
|
||||
raw = func(*inputs)
|
||||
except Exception:
|
||||
return (-1e6,)
|
||||
|
||||
|
||||
# 确保数组长度
|
||||
if not isinstance(raw, np.ndarray):
|
||||
return (-1e6,)
|
||||
if raw.shape[0] != len(idx):
|
||||
return (-1e6,)
|
||||
|
||||
|
||||
# 转换为Series并清理
|
||||
factor = pd.Series(raw, index=idx)
|
||||
factor = factor.replace([np.inf, -np.inf], np.nan)
|
||||
factor = factor.ffill().bfill()
|
||||
|
||||
|
||||
# 计算滚动IC
|
||||
window = self.config.ic_window
|
||||
if len(factor) < window + 10:
|
||||
return (-1e6,)
|
||||
|
||||
|
||||
from validation import compute_rolling_ic
|
||||
ic_series = compute_rolling_ic(factor, target, window=window, method=self.config.ic_method)
|
||||
|
||||
ic_series = compute_rolling_ic(
|
||||
factor, target, window=window, method=self.config.ic_method
|
||||
)
|
||||
mean_ic = ic_series.mean()
|
||||
|
||||
|
||||
if not np.isfinite(mean_ic):
|
||||
return (-1e6,)
|
||||
|
||||
|
||||
# 复杂度惩罚
|
||||
complexity = len(individual)
|
||||
fitness = mean_ic - self.config.complexity_penalty * complexity
|
||||
|
||||
|
||||
if not np.isfinite(fitness):
|
||||
fitness = -1e6
|
||||
|
||||
|
||||
return (fitness,)
|
||||
|
||||
|
||||
def _individual_to_formula(
|
||||
self,
|
||||
individual,
|
||||
feature_names: List[str]
|
||||
self, individual, feature_names: List[str]
|
||||
) -> FactorFormula:
|
||||
"""将GP个体转换为因子公式"""
|
||||
# GP表达式是PrimitiveTree,转换为字符串后是函数调用形式
|
||||
# 例如: "add(ARG0, ARG1)" 或 "mul(add(ARG0, ARG1), const)"
|
||||
expr_str = str(individual)
|
||||
|
||||
|
||||
# 替换ARG0, ARG1等为实际特征名
|
||||
for i, name in enumerate(feature_names):
|
||||
expr_str = expr_str.replace(f"ARG{i}", name)
|
||||
|
||||
|
||||
# GP表达式已经是Python可执行的函数调用格式
|
||||
# 例如: "add(close, open)" 可以直接eval
|
||||
# 但需要确保所有算子都在环境中可用
|
||||
|
||||
|
||||
return FactorFormula(expr_str, feature_names)
|
||||
|
||||
|
||||
def mine(
|
||||
self,
|
||||
data: pd.DataFrame,
|
||||
feature_cols: List[str],
|
||||
price_col: str = "close"
|
||||
self, data: pd.DataFrame, feature_cols: List[str], price_col: str = "close"
|
||||
) -> List[FactorFormula]:
|
||||
"""执行GP挖掘"""
|
||||
if self.config.seed is not None:
|
||||
random.seed(self.config.seed)
|
||||
np.random.seed(self.config.seed)
|
||||
|
||||
|
||||
# 准备数据
|
||||
price = data[price_col].astype(float)
|
||||
forward_ret = compute_forward_returns(price, self.config.ret_horizon)
|
||||
target = forward_ret
|
||||
|
||||
|
||||
self.features = [data[c].astype(float) for c in feature_cols]
|
||||
|
||||
|
||||
# 构建原始集合
|
||||
self.pset = self._build_pset(feature_cols)
|
||||
|
||||
|
||||
# 创建DEAP类型
|
||||
if not hasattr(creator, "FitnessMax"):
|
||||
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
|
||||
if not hasattr(creator, "Individual"):
|
||||
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)
|
||||
|
||||
|
||||
# 构建工具箱
|
||||
self.toolbox = base.Toolbox()
|
||||
self.toolbox.register(
|
||||
@@ -175,38 +175,46 @@ class GPMiner(FactorMiner):
|
||||
gp.genHalfAndHalf,
|
||||
pset=self.pset,
|
||||
min_=1,
|
||||
max_=self.config.max_depth_init
|
||||
max_=self.config.max_depth_init,
|
||||
)
|
||||
self.toolbox.register("individual", tools.initIterate, creator.Individual, self.toolbox.expr)
|
||||
self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
|
||||
self.toolbox.register("compile", gp.compile, pset=self.pset)
|
||||
|
||||
self.toolbox.register(
|
||||
"evaluate",
|
||||
self._evaluate_individual,
|
||||
target=target
|
||||
"individual", tools.initIterate, creator.Individual, self.toolbox.expr
|
||||
)
|
||||
|
||||
self.toolbox.register(
|
||||
"population", tools.initRepeat, list, self.toolbox.individual
|
||||
)
|
||||
self.toolbox.register("compile", gp.compile, pset=self.pset)
|
||||
|
||||
self.toolbox.register("evaluate", self._evaluate_individual, target=target)
|
||||
|
||||
# 遗传算子
|
||||
self.toolbox.register("select", tools.selTournament, tournsize=self.config.tournament_size)
|
||||
self.toolbox.register(
|
||||
"select", tools.selTournament, tournsize=self.config.tournament_size
|
||||
)
|
||||
self.toolbox.register("mate", gp.cxOnePoint)
|
||||
self.toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
|
||||
self.toolbox.register("mutate", gp.mutUniform, expr=self.toolbox.expr_mut, pset=self.pset)
|
||||
|
||||
self.toolbox.register(
|
||||
"mutate", gp.mutUniform, expr=self.toolbox.expr_mut, pset=self.pset
|
||||
)
|
||||
|
||||
# 控制树深度
|
||||
self.toolbox.decorate(
|
||||
"mate",
|
||||
gp.staticLimit(key=operator.attrgetter("height"), max_value=self.config.max_depth)
|
||||
gp.staticLimit(
|
||||
key=operator.attrgetter("height"), max_value=self.config.max_depth
|
||||
),
|
||||
)
|
||||
self.toolbox.decorate(
|
||||
"mutate",
|
||||
gp.staticLimit(key=operator.attrgetter("height"), max_value=self.config.max_depth)
|
||||
gp.staticLimit(
|
||||
key=operator.attrgetter("height"), max_value=self.config.max_depth
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# 运行进化
|
||||
pop = self.toolbox.population(n=self.config.population_size)
|
||||
hof = tools.HallOfFame(maxsize=max(5, self.config.elitism))
|
||||
|
||||
hof = tools.HallOfFame(maxsize=max(5000, self.config.elitism))
|
||||
|
||||
stats_fit = tools.Statistics(lambda ind: ind.fitness.values[0])
|
||||
stats_size = tools.Statistics(len)
|
||||
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
|
||||
@@ -214,7 +222,7 @@ class GPMiner(FactorMiner):
|
||||
mstats.register("std", np.nanstd)
|
||||
mstats.register("min", np.nanmin)
|
||||
mstats.register("max", np.nanmax)
|
||||
|
||||
|
||||
pop, logbook = algorithms.eaSimple(
|
||||
pop,
|
||||
self.toolbox,
|
||||
@@ -225,12 +233,11 @@ class GPMiner(FactorMiner):
|
||||
halloffame=hof,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
|
||||
# 转换为因子公式
|
||||
formulas = []
|
||||
for individual in hof:
|
||||
formula = self._individual_to_formula(individual, feature_cols)
|
||||
formulas.append(formula)
|
||||
|
||||
return formulas
|
||||
|
||||
return formulas
|
||||
|
||||
@@ -9,6 +9,8 @@ from typing import Dict, Callable, List, Optional, Any
|
||||
from abc import ABC, abstractmethod
|
||||
import inspect
|
||||
|
||||
import talib
|
||||
|
||||
|
||||
class Operator(ABC):
|
||||
"""算子基类"""
|
||||
@@ -99,6 +101,9 @@ def get_registry() -> OperatorRegistry:
|
||||
return _registry
|
||||
|
||||
|
||||
# 定义period参数的值范围
|
||||
PERIOD_RANGE = range(10, 100) # 10到99
|
||||
|
||||
# ==================== 基础数学算子 ====================
|
||||
|
||||
|
||||
@@ -153,8 +158,6 @@ def _pow(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
||||
|
||||
|
||||
# ==================== 时间序列算子 ====================
|
||||
|
||||
|
||||
def _rolling_mean(x: np.ndarray, window: int) -> np.ndarray:
|
||||
s = pd.Series(x)
|
||||
return s.rolling(window, min_periods=max(2, window // 2)).mean().to_numpy()
|
||||
@@ -184,8 +187,20 @@ def _delay(x: np.ndarray, period: int) -> np.ndarray:
|
||||
return s.shift(period).to_numpy()
|
||||
|
||||
|
||||
def _pct_change(x: np.ndarray, period: int = 1) -> np.ndarray:
|
||||
"""百分比变化"""
|
||||
s = pd.Series(x)
|
||||
return s.pct_change(periods=period, fill_method=None).to_numpy()
|
||||
|
||||
|
||||
# 注册单参数百分比变化算子
|
||||
@register_operator("pct", "百分比变化: PCT(x, 1)")
|
||||
def _pct(x: np.ndarray) -> np.ndarray:
|
||||
return _pct_change(x, 1)
|
||||
|
||||
|
||||
# 注册时间序列算子(带不同窗口)
|
||||
for w in (3, 6, 12, 24, 48, 96):
|
||||
for w in PERIOD_RANGE:
|
||||
_registry.register_function(
|
||||
f"sma{w}", lambda x, w=w: _rolling_mean(x, w), f"简单移动平均: SMA(x, {w})"
|
||||
)
|
||||
@@ -203,6 +218,347 @@ for w in (3, 6, 12, 24, 48, 96):
|
||||
)
|
||||
|
||||
|
||||
# ==================== 技术指标算子(含自定义与ta-lib)====================
|
||||
|
||||
|
||||
def _try_float(x):
|
||||
try:
|
||||
return float(x)
|
||||
except Exception:
|
||||
return x
|
||||
|
||||
|
||||
def _convert_input(v):
|
||||
# 如果是pd.Series,返回np.ndarray; 如果已经是np.ndarray则原样返回
|
||||
if isinstance(v, pd.Series):
|
||||
return v.values
|
||||
return v
|
||||
|
||||
|
||||
# 注册 ta-lib 技术指标
|
||||
# 获取 TA-Lib 的所有函数名(常用financial indicators均为大写)
|
||||
talib_func_list = [f for f in dir(talib) if f.isupper() and callable(getattr(talib, f))]
|
||||
|
||||
# 定义需要生成多版本的参数名(period相关参数)
|
||||
# 按优先级排序,优先匹配主要的period参数
|
||||
PERIOD_PARAM_NAMES = [
|
||||
"timeperiod", # 最常见的参数名
|
||||
"period", # 通用period参数
|
||||
"optintimeperiod", # TA-Lib内部参数名
|
||||
]
|
||||
|
||||
# 多period参数的函数(需要特殊处理)
|
||||
# 对于这些函数,明确指定主要period参数,避免自动检测错误
|
||||
MULTI_PERIOD_FUNCTIONS = {
|
||||
# 函数名: (主要period参数名, 次要period参数列表,仅用于文档)
|
||||
"MACD": ("fastperiod", ["slowperiod", "signalperiod"]),
|
||||
"MACDEXT": ("fastperiod", ["slowperiod", "signalperiod"]),
|
||||
"MACDFIX": ("signalperiod", []),
|
||||
"STOCH": ("fastk_period", ["slowk_period", "slowd_period"]),
|
||||
"STOCHF": ("fastk_period", ["fastd_period"]),
|
||||
"STOCHRSI": ("timeperiod", ["fastk_period", "fastd_period"]),
|
||||
"BBANDS": ("timeperiod", ["nbdevup", "nbdevdn"]),
|
||||
"APO": ("fastperiod", ["slowperiod"]),
|
||||
"PPO": ("fastperiod", ["slowperiod"]),
|
||||
"ULTOSC": ("timeperiod1", ["timeperiod2", "timeperiod3"]),
|
||||
"BOP": ("", []), # 无period参数,注册默认版本
|
||||
}
|
||||
|
||||
|
||||
def build_talib_wrapper(func, func_name, fixed_params=None):
|
||||
"""构建talib函数包装器,支持固定某些参数"""
|
||||
fixed_params = fixed_params or {}
|
||||
|
||||
def _talib_wrap(*args, **kwargs):
|
||||
# 合并固定参数和传入参数
|
||||
merged_kwargs = {**fixed_params, **kwargs}
|
||||
# ta-lib 有些函数只支持关键字参数
|
||||
# 自动转换所有输入类型
|
||||
args = tuple(_convert_input(arg) for arg in args)
|
||||
for k in merged_kwargs:
|
||||
merged_kwargs[k] = _convert_input(merged_kwargs[k])
|
||||
result = func(*args, **merged_kwargs)
|
||||
# TA-Lib有些输出是tuple(比如MACD),统一返回ndarray/tuple[ndarray]
|
||||
if isinstance(result, tuple):
|
||||
# 保持tuple结构
|
||||
return tuple(
|
||||
np.asarray(item) if item is not None else None for item in result
|
||||
)
|
||||
return np.asarray(result)
|
||||
|
||||
_talib_wrap.__name__ = f"talib_{func_name.lower()}"
|
||||
return _talib_wrap
|
||||
|
||||
|
||||
for func_name in talib_func_list:
|
||||
func = getattr(talib, func_name)
|
||||
sig = inspect.signature(func)
|
||||
params = sig.parameters
|
||||
|
||||
# 检查是否在特殊配置字典中
|
||||
if func_name in MULTI_PERIOD_FUNCTIONS:
|
||||
main_period_param, _ = MULTI_PERIOD_FUNCTIONS[func_name]
|
||||
# 如果配置中指定了主要period参数,使用它
|
||||
if main_period_param and main_period_param in params:
|
||||
for period_value in PERIOD_RANGE:
|
||||
fixed_params = {main_period_param: period_value}
|
||||
wrapper = build_talib_wrapper(func, func_name, fixed_params)
|
||||
op_name = f"talib_{func_name.lower()}_{period_value}"
|
||||
desc = f"ta-lib: {func_name}({main_period_param}={period_value})"
|
||||
_registry.register_function(op_name, wrapper, desc)
|
||||
else:
|
||||
# 配置中指定无period参数,注册默认版本
|
||||
wrapper = build_talib_wrapper(func, func_name)
|
||||
op_name = f"talib_{func_name.lower()}"
|
||||
desc = f"ta-lib: {func_name}"
|
||||
_registry.register_function(op_name, wrapper, desc)
|
||||
else:
|
||||
# 不在特殊配置中,自动检测period参数
|
||||
period_params = {}
|
||||
for param_name, param in params.items():
|
||||
param_lower = param_name.lower()
|
||||
# 检查是否是period相关参数
|
||||
if any(
|
||||
period_keyword in param_lower for period_keyword in PERIOD_PARAM_NAMES
|
||||
):
|
||||
period_params[param_name] = param
|
||||
|
||||
if period_params:
|
||||
# 如果有period参数,为每个period值生成一个版本
|
||||
# 优先选择timeperiod,否则选择第一个
|
||||
main_period_param = None
|
||||
for preferred in ["timeperiod", "period", "optintimeperiod"]:
|
||||
for param_name in period_params.keys():
|
||||
if preferred in param_name.lower():
|
||||
main_period_param = param_name
|
||||
break
|
||||
if main_period_param:
|
||||
break
|
||||
|
||||
if not main_period_param:
|
||||
main_period_param = list(period_params.keys())[0]
|
||||
|
||||
for period_value in PERIOD_RANGE:
|
||||
fixed_params = {main_period_param: period_value}
|
||||
wrapper = build_talib_wrapper(func, func_name, fixed_params)
|
||||
op_name = f"talib_{func_name.lower()}_{period_value}"
|
||||
desc = f"ta-lib: {func_name}({main_period_param}={period_value})"
|
||||
_registry.register_function(op_name, wrapper, desc)
|
||||
else:
|
||||
# 如果没有period参数,注册默认版本
|
||||
wrapper = build_talib_wrapper(func, func_name)
|
||||
op_name = f"talib_{func_name.lower()}"
|
||||
desc = f"ta-lib: {func_name}"
|
||||
_registry.register_function(op_name, wrapper, desc)
|
||||
|
||||
# ==================== 自定义常见技术指标 ====================
|
||||
|
||||
|
||||
def _ewm_forward(x: np.ndarray, alpha: float) -> np.ndarray:
|
||||
"""指数加权移动平均(前向计算)"""
|
||||
result = np.zeros_like(x)
|
||||
if len(x) == 0:
|
||||
return result
|
||||
result[0] = x[0]
|
||||
for i in range(1, len(x)):
|
||||
result[i] = x[i] * alpha + (1 - alpha) * result[i - 1]
|
||||
return result
|
||||
|
||||
|
||||
def _rsv(x: np.ndarray, window: int) -> np.ndarray:
|
||||
"""相对强弱值: (当前值 - 最小值) / (最大值 - 最小值)"""
|
||||
s = pd.Series(x)
|
||||
rolling = s.rolling(window, min_periods=max(2, window // 2), closed="both")
|
||||
min_val = rolling.min()
|
||||
max_val = rolling.max()
|
||||
diff = max_val - min_val
|
||||
# 避免除零
|
||||
diff = np.where(np.abs(diff) < 1e-12, np.nan, diff)
|
||||
result = (s - min_val) / diff
|
||||
return result.to_numpy()
|
||||
|
||||
|
||||
def _bband(x: np.ndarray, window: int) -> np.ndarray:
|
||||
"""布林带指标: (当前值 - 均值) / 标准差"""
|
||||
s = pd.Series(x)
|
||||
rolling = s.rolling(window, min_periods=max(2, window // 2), closed="both")
|
||||
mean_val = rolling.mean()
|
||||
std_val = rolling.std()
|
||||
# 避免除零
|
||||
std_val = np.where(np.abs(std_val) < 1e-12, np.nan, std_val)
|
||||
result = (s - mean_val) / std_val
|
||||
return result.to_numpy()
|
||||
|
||||
|
||||
def _rsi(x: np.ndarray, window: int, threshold: float = 0.00001) -> np.ndarray:
|
||||
"""相对强弱指标: 上涨和下跌的比例"""
|
||||
s = pd.Series(x)
|
||||
diff = s.diff()
|
||||
rolling = diff.rolling(window, min_periods=max(2, window // 2), closed="both")
|
||||
|
||||
def _rsi_calc(series):
|
||||
up_sum = series[series > threshold].sum()
|
||||
down_sum = abs(series[series < -threshold].sum())
|
||||
total = up_sum + down_sum
|
||||
if total < 1e-12:
|
||||
return np.nan
|
||||
return up_sum / total
|
||||
|
||||
result = rolling.apply(_rsi_calc, raw=False)
|
||||
return result.to_numpy()
|
||||
|
||||
|
||||
def _rolling_skew(x: np.ndarray, window: int) -> np.ndarray:
|
||||
"""滚动偏度"""
|
||||
s = pd.Series(x)
|
||||
return (
|
||||
s.rolling(window, min_periods=max(2, window // 2), closed="both")
|
||||
.skew()
|
||||
.to_numpy()
|
||||
)
|
||||
|
||||
|
||||
def _rolling_kurtosis(x: np.ndarray, window: int) -> np.ndarray:
|
||||
"""滚动峰度"""
|
||||
s = pd.Series(x)
|
||||
return (
|
||||
s.rolling(window, min_periods=max(2, window // 2), closed="both")
|
||||
.kurt()
|
||||
.to_numpy()
|
||||
)
|
||||
|
||||
|
||||
def _rolling_linear(x: np.ndarray, window: int) -> np.ndarray:
|
||||
"""滚动线性回归斜率"""
|
||||
s = pd.Series(x)
|
||||
|
||||
def _linear_slope(series):
|
||||
valid = series.dropna()
|
||||
if len(valid) < 2:
|
||||
return np.nan
|
||||
try:
|
||||
coeffs = np.polyfit(np.arange(len(valid)), valid.values, 1)
|
||||
return coeffs[0]
|
||||
except:
|
||||
return np.nan
|
||||
|
||||
result = s.rolling(window, min_periods=max(2, window // 2), closed="both").apply(
|
||||
_linear_slope, raw=False
|
||||
)
|
||||
return result.to_numpy()
|
||||
|
||||
|
||||
def _rolling_autocorr(x: np.ndarray, window: int, lag: int = 1) -> np.ndarray:
|
||||
"""滚动自相关"""
|
||||
s = pd.Series(x)
|
||||
result = s.rolling(window, min_periods=max(2, window // 2), closed="both").apply(
|
||||
lambda series: (
|
||||
series.autocorr(lag=lag) if len(series.dropna()) >= 2 else np.nan
|
||||
),
|
||||
raw=False,
|
||||
)
|
||||
return result.to_numpy()
|
||||
|
||||
|
||||
def _rolling_max(x: np.ndarray, window: int) -> np.ndarray:
|
||||
"""滚动最大值"""
|
||||
s = pd.Series(x)
|
||||
return (
|
||||
s.rolling(window, min_periods=max(2, window // 2), closed="both")
|
||||
.max()
|
||||
.to_numpy()
|
||||
)
|
||||
|
||||
|
||||
def _rolling_min(x: np.ndarray, window: int) -> np.ndarray:
|
||||
"""滚动最小值"""
|
||||
s = pd.Series(x)
|
||||
return (
|
||||
s.rolling(window, min_periods=max(2, window // 2), closed="both")
|
||||
.min()
|
||||
.to_numpy()
|
||||
)
|
||||
|
||||
|
||||
def _huanbi(x: np.ndarray, window: int) -> np.ndarray:
|
||||
"""环比: 当前值 / 窗口起始值"""
|
||||
s = pd.Series(x)
|
||||
|
||||
def _huanbi_calc(series):
|
||||
if len(series) < 2:
|
||||
return np.nan
|
||||
start_val = series.iloc[0]
|
||||
end_val = series.iloc[-1]
|
||||
if abs(start_val) < 1e-12:
|
||||
return np.nan
|
||||
return end_val / start_val
|
||||
|
||||
result = s.rolling(window, min_periods=max(2, window // 2), closed="both").apply(
|
||||
_huanbi_calc, raw=False
|
||||
)
|
||||
return result.to_numpy()
|
||||
|
||||
|
||||
# 注册技术指标算子(带不同窗口)
|
||||
for w in PERIOD_RANGE:
|
||||
# EWM算子(使用固定alpha值)
|
||||
alpha = 2.0 / (w + 1)
|
||||
_registry.register_function(
|
||||
f"ewm{w}",
|
||||
lambda x, w=w, a=alpha: _ewm_forward(x, a),
|
||||
f"指数加权移动平均: EWM(x, {w})",
|
||||
)
|
||||
|
||||
# 百分比变化
|
||||
_registry.register_function(
|
||||
f"pct{w}", lambda x, w=w: _pct_change(x, w), f"百分比变化: PCT(x, {w})"
|
||||
)
|
||||
|
||||
# RSV(相对强弱值)
|
||||
_registry.register_function(
|
||||
f"rsv{w}", lambda x, w=w: _rsv(x, w), f"相对强弱值: RSV(x, {w})"
|
||||
)
|
||||
|
||||
# 布林带
|
||||
_registry.register_function(
|
||||
f"bband{w}", lambda x, w=w: _bband(x, w), f"布林带指标: BBAND(x, {w})"
|
||||
)
|
||||
|
||||
# RSI
|
||||
_registry.register_function(
|
||||
f"rsi{w}", lambda x, w=w: _rsi(x, w), f"相对强弱指标: RSI(x, {w})"
|
||||
)
|
||||
|
||||
# 统计量
|
||||
_registry.register_function(
|
||||
f"skew{w}", lambda x, w=w: _rolling_skew(x, w), f"滚动偏度: SKEW(x, {w})"
|
||||
)
|
||||
_registry.register_function(
|
||||
f"kurt{w}", lambda x, w=w: _rolling_kurtosis(x, w), f"滚动峰度: KURT(x, {w})"
|
||||
)
|
||||
_registry.register_function(
|
||||
f"linear{w}",
|
||||
lambda x, w=w: _rolling_linear(x, w),
|
||||
f"滚动线性斜率: LINEAR(x, {w})",
|
||||
)
|
||||
_registry.register_function(
|
||||
f"autocorr{w}",
|
||||
lambda x, w=w: _rolling_autocorr(x, w),
|
||||
f"滚动自相关: AUTOCORR(x, {w})",
|
||||
)
|
||||
_registry.register_function(
|
||||
f"max{w}", lambda x, w=w: _rolling_max(x, w), f"滚动最大值: MAX(x, {w})"
|
||||
)
|
||||
_registry.register_function(
|
||||
f"min{w}", lambda x, w=w: _rolling_min(x, w), f"滚动最小值: MIN(x, {w})"
|
||||
)
|
||||
|
||||
# 环比
|
||||
_registry.register_function(
|
||||
f"huanbi{w}", lambda x, w=w: _huanbi(x, w), f"环比: HUANBI(x, {w})"
|
||||
)
|
||||
|
||||
|
||||
# ==================== 因子公式解析与计算 ====================
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user