59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
import warnings
|
||
import numpy as np
|
||
import pandas as pd
|
||
import json
|
||
|
||
# 抑制numpy的警告(由于数据中包含NaN值,这是正常的)
|
||
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
|
||
np.seterr(all="ignore") # 忽略numpy的浮点错误警告
|
||
|
||
from data import load_data
|
||
from factor_mining.gp_miner import GPMiner, GPConfig
|
||
|
||
if __name__ == "__main__":
|
||
df = load_data("/Users/aszer/Documents/vscode/factorhack/ETH_USDT-1h.feather")
|
||
# 以4小时为周期重采样K线数据(假定有datetime索引,常见ohlcv列)
|
||
df = (
|
||
df.resample("4h")
|
||
.agg(
|
||
{
|
||
"open": "first",
|
||
"high": "max",
|
||
"low": "min",
|
||
"close": "last",
|
||
"volume": "sum",
|
||
}
|
||
)
|
||
.dropna()
|
||
)
|
||
df = df[df.index < '2023-01-01']
|
||
print("数据加载成功,前5行:")
|
||
print(df.head())
|
||
print(f"\n数据形状: {df.shape}")
|
||
print(f"数据列: {df.columns.tolist()}")
|
||
gp_config = GPConfig(
|
||
ret_horizon=48,
|
||
ic_window=120,
|
||
ic_method="spearman",
|
||
seed=None,
|
||
population_size=200,
|
||
generations=30,
|
||
tournament_size=5,
|
||
crossover_prob=0.9,
|
||
mutation_prob=0.05,
|
||
elitism=5,
|
||
max_depth_init=1,
|
||
max_depth=30,
|
||
complexity_penalty=0.001,
|
||
)
|
||
miner = GPMiner(config=gp_config)
|
||
res = miner.mine(df, ["open", "high", "low", "close", "volume"])
|
||
with open("gp_miner_result.txt", "w") as out_file:
|
||
for formula, ic_tuple in res:
|
||
# ic_tuple 是元组,取第一个元素作为IC值
|
||
ic = ic_tuple[0] if isinstance(ic_tuple, tuple) else ic_tuple
|
||
print(f"{formula.expression}, IC: {ic:.4f}")
|
||
# 将因子公式转换为字典并写入文件
|
||
out_file.write(json.dumps(formula.to_dict(), ensure_ascii=False))
|
||
out_file.write("\n")
|