import warnings import numpy as np import pandas as pd import json # 抑制numpy的警告(由于数据中包含NaN值,这是正常的) warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy") np.seterr(all="ignore") # 忽略numpy的浮点错误警告 from data import load_data from factor_mining.gp_miner import GPMiner, GPConfig if __name__ == "__main__": df = load_data("/Users/aszer/Documents/vscode/factorhack/ETH_USDT-1h.feather") # 以4小时为周期重采样K线数据(假定有datetime索引,常见ohlcv列) df = ( df.resample("4h") .agg( { "open": "first", "high": "max", "low": "min", "close": "last", "volume": "sum", } ) .dropna() ) df = df[df.index < '2023-01-01'] print("数据加载成功,前5行:") print(df.head()) print(f"\n数据形状: {df.shape}") print(f"数据列: {df.columns.tolist()}") gp_config = GPConfig( ret_horizon=48, ic_window=120, ic_method="spearman", seed=None, population_size=200, generations=30, tournament_size=5, crossover_prob=0.9, mutation_prob=0.05, elitism=5, max_depth_init=1, max_depth=30, complexity_penalty=0.001, ) miner = GPMiner(config=gp_config) res = miner.mine(df, ["open", "high", "low", "close", "volume"]) with open("gp_miner_result.txt", "w") as out_file: for formula, ic_tuple in res: # ic_tuple 是元组,取第一个元素作为IC值 ic = ic_tuple[0] if isinstance(ic_tuple, tuple) else ic_tuple print(f"{formula.expression}, IC: {ic:.4f}") # 将因子公式转换为字典并写入文件 out_file.write(json.dumps(formula.to_dict(), ensure_ascii=False)) out_file.write("\n")