Files
factorhack/test.py
2025-11-18 00:14:34 +08:00

59 lines
1.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import warnings
import numpy as np
import pandas as pd
import json
# 抑制numpy的警告由于数据中包含NaN值这是正常的
warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")
np.seterr(all="ignore") # 忽略numpy的浮点错误警告
from data import load_data
from factor_mining.gp_miner import GPMiner, GPConfig
if __name__ == "__main__":
df = load_data("/Users/aszer/Documents/vscode/factorhack/ETH_USDT-1h.feather")
# 以4小时为周期重采样K线数据假定有datetime索引常见ohlcv列
df = (
df.resample("4h")
.agg(
{
"open": "first",
"high": "max",
"low": "min",
"close": "last",
"volume": "sum",
}
)
.dropna()
)
df = df[df.index < '2023-01-01']
print("数据加载成功前5行")
print(df.head())
print(f"\n数据形状: {df.shape}")
print(f"数据列: {df.columns.tolist()}")
gp_config = GPConfig(
ret_horizon=48,
ic_window=120,
ic_method="spearman",
seed=None,
population_size=200,
generations=30,
tournament_size=5,
crossover_prob=0.9,
mutation_prob=0.05,
elitism=5,
max_depth_init=1,
max_depth=30,
complexity_penalty=0.001,
)
miner = GPMiner(config=gp_config)
res = miner.mine(df, ["open", "high", "low", "close", "volume"])
with open("gp_miner_result.txt", "w") as out_file:
for formula, ic_tuple in res:
# ic_tuple 是元组取第一个元素作为IC值
ic = ic_tuple[0] if isinstance(ic_tuple, tuple) else ic_tuple
print(f"{formula.expression}, IC: {ic:.4f}")
# 将因子公式转换为字典并写入文件
out_file.write(json.dumps(formula.to_dict(), ensure_ascii=False))
out_file.write("\n")