288 lines
9.4 KiB
Python
288 lines
9.4 KiB
Python
"""
|
||
主流程:时间序列因子挖掘、检验、回测、信号生成
|
||
"""
|
||
import pandas as pd
|
||
from typing import Dict, List, Optional
|
||
import warnings
|
||
warnings.filterwarnings('ignore')
|
||
|
||
from data import load_data, compute_technical_indicators, preprocess_data, compute_forward_returns
|
||
from factors import FactorMiner, create_default_factors
|
||
from validation import validate_factor, factor_span_regression
|
||
from combination import MultiFactorModel
|
||
from backtest import BacktestEngine
|
||
from signal import generate_signals
|
||
|
||
|
||
class FactorPipeline:
|
||
"""因子挖掘流程"""
|
||
|
||
def __init__(
|
||
self,
|
||
ret_horizon: int = 1,
|
||
ic_window: int = 30,
|
||
commission: float = 0.001,
|
||
slippage: float = 0.0005
|
||
):
|
||
"""
|
||
Parameters:
|
||
-----------
|
||
ret_horizon : int
|
||
未来收益率周期
|
||
ic_window : int
|
||
IC计算窗口
|
||
commission : float
|
||
手续费率
|
||
slippage : float
|
||
滑点
|
||
"""
|
||
self.ret_horizon = ret_horizon
|
||
self.ic_window = ic_window
|
||
self.commission = commission
|
||
self.slippage = slippage
|
||
|
||
self.data: Optional[pd.DataFrame] = None
|
||
self.factors: Optional[pd.DataFrame] = None
|
||
self.forward_return: Optional[pd.Series] = None
|
||
self.factor_miner: Optional[FactorMiner] = None
|
||
self.validation_results: Dict = {}
|
||
self.model: Optional[MultiFactorModel] = None
|
||
self.score: Optional[pd.Series] = None
|
||
self.backtest_results: Optional[Dict] = None
|
||
|
||
def load_and_preprocess(self, file_path: str) -> 'FactorPipeline':
|
||
"""步骤1:加载和预处理数据"""
|
||
print("=" * 50)
|
||
print("步骤1:加载和预处理数据")
|
||
print("=" * 50)
|
||
|
||
# 加载数据
|
||
self.data = load_data(file_path)
|
||
print(f"加载数据: {len(self.data)} 条记录")
|
||
|
||
# 计算技术指标
|
||
self.data = compute_technical_indicators(self.data)
|
||
print("计算技术指标完成")
|
||
|
||
# 预处理
|
||
self.data = preprocess_data(self.data)
|
||
print("数据预处理完成")
|
||
|
||
# 计算未来收益率
|
||
self.forward_return = compute_forward_returns(
|
||
self.data['close'],
|
||
horizon=self.ret_horizon
|
||
)
|
||
print(f"计算未来收益率完成(周期={self.ret_horizon})")
|
||
|
||
return self
|
||
|
||
def mine_factors(self, custom_miner: Optional[FactorMiner] = None) -> 'FactorPipeline':
|
||
"""步骤2:因子挖掘"""
|
||
print("\n" + "=" * 50)
|
||
print("步骤2:因子挖掘")
|
||
print("=" * 50)
|
||
|
||
if self.data is None:
|
||
raise ValueError("请先加载数据")
|
||
|
||
# 使用自定义或默认因子挖掘器
|
||
if custom_miner is None:
|
||
self.factor_miner = create_default_factors()
|
||
else:
|
||
self.factor_miner = custom_miner
|
||
|
||
# 计算所有因子
|
||
self.factors = self.factor_miner.compute_all_factors(self.data)
|
||
print(f"计算因子完成: {list(self.factors.columns)}")
|
||
|
||
return self
|
||
|
||
def validate_factors(self, min_ic: float = 0.01, min_tstat: float = 1.5) -> 'FactorPipeline':
|
||
"""步骤3:因子检验"""
|
||
print("\n" + "=" * 50)
|
||
print("步骤3:因子检验")
|
||
print("=" * 50)
|
||
|
||
if self.factors is None or self.forward_return is None:
|
||
raise ValueError("请先完成因子挖掘")
|
||
|
||
valid_factors = []
|
||
self.validation_results = {}
|
||
|
||
for factor_name in self.factors.columns:
|
||
factor = self.factors[factor_name]
|
||
|
||
# 综合检验
|
||
result = validate_factor(factor, self.forward_return, ic_window=self.ic_window)
|
||
self.validation_results[factor_name] = result
|
||
|
||
# 筛选有效因子
|
||
if (abs(result['mean_ic']) >= min_ic and
|
||
abs(result['mean_h_l_tstat']) >= min_tstat):
|
||
valid_factors.append(factor_name)
|
||
print(f"\n因子 {factor_name}:")
|
||
print(f" 平均IC: {result['mean_ic']:.4f}")
|
||
print(f" IC信息比率: {result['ic_ir']:.4f}")
|
||
print(f" H-L收益差: {result['mean_h_l_return']:.4f}")
|
||
print(f" H-L t统计量: {result['mean_h_l_tstat']:.4f}")
|
||
else:
|
||
print(f"\n因子 {factor_name} 未通过检验 (IC={result['mean_ic']:.4f}, t={result['mean_h_l_tstat']:.4f})")
|
||
|
||
# 只保留有效因子
|
||
if valid_factors:
|
||
self.factors = self.factors[valid_factors]
|
||
print(f"\n有效因子: {valid_factors}")
|
||
else:
|
||
print("\n警告:没有因子通过检验!")
|
||
|
||
return self
|
||
|
||
def combine_factors(
|
||
self,
|
||
weight_method: str = 'risk_parity',
|
||
window: Optional[int] = None
|
||
) -> 'FactorPipeline':
|
||
"""步骤4:因子组合"""
|
||
print("\n" + "=" * 50)
|
||
print("步骤4:因子组合")
|
||
print("=" * 50)
|
||
|
||
if self.factors is None or len(self.factors.columns) == 0:
|
||
raise ValueError("没有有效因子可组合")
|
||
|
||
# 创建多因子模型
|
||
self.model = MultiFactorModel(weight_method=weight_method)
|
||
self.model.fit(
|
||
self.factors,
|
||
forward_return=self.forward_return,
|
||
window=window
|
||
)
|
||
|
||
# 计算综合得分
|
||
self.score = self.model.predict(self.factors)
|
||
|
||
# 显示权重
|
||
weights = self.model.get_weights()
|
||
print("因子权重:")
|
||
for name, weight in weights.items():
|
||
print(f" {name}: {weight:.4f}")
|
||
|
||
print(f"\n综合得分统计:")
|
||
print(f" 均值: {self.score.mean():.4f}")
|
||
print(f" 标准差: {self.score.std():.4f}")
|
||
|
||
return self
|
||
|
||
def generate_signals(
|
||
self,
|
||
buy_threshold: float = 0.8,
|
||
sell_threshold: float = -0.8,
|
||
window: int = 30
|
||
) -> pd.Series:
|
||
"""步骤5:生成交易信号"""
|
||
if self.score is None:
|
||
raise ValueError("请先完成因子组合")
|
||
|
||
signals = generate_signals(
|
||
self.score,
|
||
buy_threshold=buy_threshold,
|
||
sell_threshold=sell_threshold,
|
||
window=window
|
||
)
|
||
|
||
return signals
|
||
|
||
def backtest(
|
||
self,
|
||
signals: Optional[pd.Series] = None,
|
||
buy_threshold: float = 0.8,
|
||
sell_threshold: float = -0.8,
|
||
window: int = 30
|
||
) -> Dict:
|
||
"""步骤6:回测"""
|
||
print("\n" + "=" * 50)
|
||
print("步骤6:回测")
|
||
print("=" * 50)
|
||
|
||
if self.data is None:
|
||
raise ValueError("请先加载数据")
|
||
|
||
if signals is None:
|
||
signals = self.generate_signals(buy_threshold, sell_threshold, window)
|
||
|
||
# 创建回测引擎
|
||
engine = BacktestEngine(
|
||
commission=self.commission,
|
||
slippage=self.slippage
|
||
)
|
||
|
||
# 运行回测
|
||
self.backtest_results = engine.run(
|
||
signals,
|
||
self.data['close'],
|
||
score=self.score
|
||
)
|
||
|
||
# 显示结果
|
||
metrics = self.backtest_results['metrics']
|
||
print("\n回测结果:")
|
||
print(f" 总收益率: {metrics.get('total_return', 0)*100:.2f}%")
|
||
print(f" 年化收益率: {metrics.get('annual_return', 0)*100:.2f}%")
|
||
print(f" 年化波动率: {metrics.get('annual_volatility', 0)*100:.2f}%")
|
||
print(f" 夏普比率: {metrics.get('sharpe_ratio', 0):.2f}")
|
||
print(f" 最大回撤: {metrics.get('max_drawdown', 0)*100:.2f}%")
|
||
print(f" 胜率: {metrics.get('win_rate', 0)*100:.2f}%")
|
||
print(f" 盈亏比: {metrics.get('profit_loss_ratio', 0):.2f}")
|
||
print(f" 交易次数: {metrics.get('total_trades', 0)}")
|
||
|
||
return self.backtest_results
|
||
|
||
def run_full_pipeline(
|
||
self,
|
||
file_path: str,
|
||
custom_miner: Optional[FactorMiner] = None,
|
||
min_ic: float = 0.01,
|
||
min_tstat: float = 1.5,
|
||
weight_method: str = 'risk_parity',
|
||
buy_threshold: float = 0.8,
|
||
sell_threshold: float = -0.8
|
||
) -> Dict:
|
||
"""运行完整流程"""
|
||
self.load_and_preprocess(file_path) \
|
||
.mine_factors(custom_miner) \
|
||
.validate_factors(min_ic, min_tstat) \
|
||
.combine_factors(weight_method) \
|
||
.backtest(buy_threshold=buy_threshold, sell_threshold=sell_threshold)
|
||
|
||
return {
|
||
'factors': self.factors,
|
||
'score': self.score,
|
||
'validation': self.validation_results,
|
||
'backtest': self.backtest_results
|
||
}
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 示例使用
|
||
pipeline = FactorPipeline(ret_horizon=1, ic_window=30)
|
||
|
||
results = pipeline.run_full_pipeline(
|
||
file_path="ETH_USDT-1h.feather",
|
||
min_ic=0.01,
|
||
min_tstat=1.5,
|
||
weight_method='risk_parity',
|
||
buy_threshold=0.8,
|
||
sell_threshold=-0.8
|
||
)
|
||
|
||
# 保存结果
|
||
if results['factors'] is not None:
|
||
results['factors'].to_csv("factors.csv")
|
||
print("\n因子数据已保存到 factors.csv")
|
||
|
||
if results['score'] is not None:
|
||
results['score'].to_csv("score.csv")
|
||
print("综合得分已保存到 score.csv")
|
||
|