第一版流程
This commit is contained in:
287
pipeline.py
Normal file
287
pipeline.py
Normal file
@@ -0,0 +1,287 @@
|
||||
"""
|
||||
主流程:时间序列因子挖掘、检验、回测、信号生成
|
||||
"""
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Optional
|
||||
import warnings
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
from data import load_data, compute_technical_indicators, preprocess_data, compute_forward_returns
|
||||
from factors import FactorMiner, create_default_factors
|
||||
from validation import validate_factor, factor_span_regression
|
||||
from combination import MultiFactorModel
|
||||
from backtest import BacktestEngine
|
||||
from signal import generate_signals
|
||||
|
||||
|
||||
class FactorPipeline:
|
||||
"""因子挖掘流程"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ret_horizon: int = 1,
|
||||
ic_window: int = 30,
|
||||
commission: float = 0.001,
|
||||
slippage: float = 0.0005
|
||||
):
|
||||
"""
|
||||
Parameters:
|
||||
-----------
|
||||
ret_horizon : int
|
||||
未来收益率周期
|
||||
ic_window : int
|
||||
IC计算窗口
|
||||
commission : float
|
||||
手续费率
|
||||
slippage : float
|
||||
滑点
|
||||
"""
|
||||
self.ret_horizon = ret_horizon
|
||||
self.ic_window = ic_window
|
||||
self.commission = commission
|
||||
self.slippage = slippage
|
||||
|
||||
self.data: Optional[pd.DataFrame] = None
|
||||
self.factors: Optional[pd.DataFrame] = None
|
||||
self.forward_return: Optional[pd.Series] = None
|
||||
self.factor_miner: Optional[FactorMiner] = None
|
||||
self.validation_results: Dict = {}
|
||||
self.model: Optional[MultiFactorModel] = None
|
||||
self.score: Optional[pd.Series] = None
|
||||
self.backtest_results: Optional[Dict] = None
|
||||
|
||||
def load_and_preprocess(self, file_path: str) -> 'FactorPipeline':
|
||||
"""步骤1:加载和预处理数据"""
|
||||
print("=" * 50)
|
||||
print("步骤1:加载和预处理数据")
|
||||
print("=" * 50)
|
||||
|
||||
# 加载数据
|
||||
self.data = load_data(file_path)
|
||||
print(f"加载数据: {len(self.data)} 条记录")
|
||||
|
||||
# 计算技术指标
|
||||
self.data = compute_technical_indicators(self.data)
|
||||
print("计算技术指标完成")
|
||||
|
||||
# 预处理
|
||||
self.data = preprocess_data(self.data)
|
||||
print("数据预处理完成")
|
||||
|
||||
# 计算未来收益率
|
||||
self.forward_return = compute_forward_returns(
|
||||
self.data['close'],
|
||||
horizon=self.ret_horizon
|
||||
)
|
||||
print(f"计算未来收益率完成(周期={self.ret_horizon})")
|
||||
|
||||
return self
|
||||
|
||||
def mine_factors(self, custom_miner: Optional[FactorMiner] = None) -> 'FactorPipeline':
|
||||
"""步骤2:因子挖掘"""
|
||||
print("\n" + "=" * 50)
|
||||
print("步骤2:因子挖掘")
|
||||
print("=" * 50)
|
||||
|
||||
if self.data is None:
|
||||
raise ValueError("请先加载数据")
|
||||
|
||||
# 使用自定义或默认因子挖掘器
|
||||
if custom_miner is None:
|
||||
self.factor_miner = create_default_factors()
|
||||
else:
|
||||
self.factor_miner = custom_miner
|
||||
|
||||
# 计算所有因子
|
||||
self.factors = self.factor_miner.compute_all_factors(self.data)
|
||||
print(f"计算因子完成: {list(self.factors.columns)}")
|
||||
|
||||
return self
|
||||
|
||||
def validate_factors(self, min_ic: float = 0.01, min_tstat: float = 1.5) -> 'FactorPipeline':
|
||||
"""步骤3:因子检验"""
|
||||
print("\n" + "=" * 50)
|
||||
print("步骤3:因子检验")
|
||||
print("=" * 50)
|
||||
|
||||
if self.factors is None or self.forward_return is None:
|
||||
raise ValueError("请先完成因子挖掘")
|
||||
|
||||
valid_factors = []
|
||||
self.validation_results = {}
|
||||
|
||||
for factor_name in self.factors.columns:
|
||||
factor = self.factors[factor_name]
|
||||
|
||||
# 综合检验
|
||||
result = validate_factor(factor, self.forward_return, ic_window=self.ic_window)
|
||||
self.validation_results[factor_name] = result
|
||||
|
||||
# 筛选有效因子
|
||||
if (abs(result['mean_ic']) >= min_ic and
|
||||
abs(result['mean_h_l_tstat']) >= min_tstat):
|
||||
valid_factors.append(factor_name)
|
||||
print(f"\n因子 {factor_name}:")
|
||||
print(f" 平均IC: {result['mean_ic']:.4f}")
|
||||
print(f" IC信息比率: {result['ic_ir']:.4f}")
|
||||
print(f" H-L收益差: {result['mean_h_l_return']:.4f}")
|
||||
print(f" H-L t统计量: {result['mean_h_l_tstat']:.4f}")
|
||||
else:
|
||||
print(f"\n因子 {factor_name} 未通过检验 (IC={result['mean_ic']:.4f}, t={result['mean_h_l_tstat']:.4f})")
|
||||
|
||||
# 只保留有效因子
|
||||
if valid_factors:
|
||||
self.factors = self.factors[valid_factors]
|
||||
print(f"\n有效因子: {valid_factors}")
|
||||
else:
|
||||
print("\n警告:没有因子通过检验!")
|
||||
|
||||
return self
|
||||
|
||||
def combine_factors(
|
||||
self,
|
||||
weight_method: str = 'risk_parity',
|
||||
window: Optional[int] = None
|
||||
) -> 'FactorPipeline':
|
||||
"""步骤4:因子组合"""
|
||||
print("\n" + "=" * 50)
|
||||
print("步骤4:因子组合")
|
||||
print("=" * 50)
|
||||
|
||||
if self.factors is None or len(self.factors.columns) == 0:
|
||||
raise ValueError("没有有效因子可组合")
|
||||
|
||||
# 创建多因子模型
|
||||
self.model = MultiFactorModel(weight_method=weight_method)
|
||||
self.model.fit(
|
||||
self.factors,
|
||||
forward_return=self.forward_return,
|
||||
window=window
|
||||
)
|
||||
|
||||
# 计算综合得分
|
||||
self.score = self.model.predict(self.factors)
|
||||
|
||||
# 显示权重
|
||||
weights = self.model.get_weights()
|
||||
print("因子权重:")
|
||||
for name, weight in weights.items():
|
||||
print(f" {name}: {weight:.4f}")
|
||||
|
||||
print(f"\n综合得分统计:")
|
||||
print(f" 均值: {self.score.mean():.4f}")
|
||||
print(f" 标准差: {self.score.std():.4f}")
|
||||
|
||||
return self
|
||||
|
||||
def generate_signals(
|
||||
self,
|
||||
buy_threshold: float = 0.8,
|
||||
sell_threshold: float = -0.8,
|
||||
window: int = 30
|
||||
) -> pd.Series:
|
||||
"""步骤5:生成交易信号"""
|
||||
if self.score is None:
|
||||
raise ValueError("请先完成因子组合")
|
||||
|
||||
signals = generate_signals(
|
||||
self.score,
|
||||
buy_threshold=buy_threshold,
|
||||
sell_threshold=sell_threshold,
|
||||
window=window
|
||||
)
|
||||
|
||||
return signals
|
||||
|
||||
def backtest(
|
||||
self,
|
||||
signals: Optional[pd.Series] = None,
|
||||
buy_threshold: float = 0.8,
|
||||
sell_threshold: float = -0.8,
|
||||
window: int = 30
|
||||
) -> Dict:
|
||||
"""步骤6:回测"""
|
||||
print("\n" + "=" * 50)
|
||||
print("步骤6:回测")
|
||||
print("=" * 50)
|
||||
|
||||
if self.data is None:
|
||||
raise ValueError("请先加载数据")
|
||||
|
||||
if signals is None:
|
||||
signals = self.generate_signals(buy_threshold, sell_threshold, window)
|
||||
|
||||
# 创建回测引擎
|
||||
engine = BacktestEngine(
|
||||
commission=self.commission,
|
||||
slippage=self.slippage
|
||||
)
|
||||
|
||||
# 运行回测
|
||||
self.backtest_results = engine.run(
|
||||
signals,
|
||||
self.data['close'],
|
||||
score=self.score
|
||||
)
|
||||
|
||||
# 显示结果
|
||||
metrics = self.backtest_results['metrics']
|
||||
print("\n回测结果:")
|
||||
print(f" 总收益率: {metrics.get('total_return', 0)*100:.2f}%")
|
||||
print(f" 年化收益率: {metrics.get('annual_return', 0)*100:.2f}%")
|
||||
print(f" 年化波动率: {metrics.get('annual_volatility', 0)*100:.2f}%")
|
||||
print(f" 夏普比率: {metrics.get('sharpe_ratio', 0):.2f}")
|
||||
print(f" 最大回撤: {metrics.get('max_drawdown', 0)*100:.2f}%")
|
||||
print(f" 胜率: {metrics.get('win_rate', 0)*100:.2f}%")
|
||||
print(f" 盈亏比: {metrics.get('profit_loss_ratio', 0):.2f}")
|
||||
print(f" 交易次数: {metrics.get('total_trades', 0)}")
|
||||
|
||||
return self.backtest_results
|
||||
|
||||
def run_full_pipeline(
|
||||
self,
|
||||
file_path: str,
|
||||
custom_miner: Optional[FactorMiner] = None,
|
||||
min_ic: float = 0.01,
|
||||
min_tstat: float = 1.5,
|
||||
weight_method: str = 'risk_parity',
|
||||
buy_threshold: float = 0.8,
|
||||
sell_threshold: float = -0.8
|
||||
) -> Dict:
|
||||
"""运行完整流程"""
|
||||
self.load_and_preprocess(file_path) \
|
||||
.mine_factors(custom_miner) \
|
||||
.validate_factors(min_ic, min_tstat) \
|
||||
.combine_factors(weight_method) \
|
||||
.backtest(buy_threshold=buy_threshold, sell_threshold=sell_threshold)
|
||||
|
||||
return {
|
||||
'factors': self.factors,
|
||||
'score': self.score,
|
||||
'validation': self.validation_results,
|
||||
'backtest': self.backtest_results
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 示例使用
|
||||
pipeline = FactorPipeline(ret_horizon=1, ic_window=30)
|
||||
|
||||
results = pipeline.run_full_pipeline(
|
||||
file_path="ETH_USDT-1h.feather",
|
||||
min_ic=0.01,
|
||||
min_tstat=1.5,
|
||||
weight_method='risk_parity',
|
||||
buy_threshold=0.8,
|
||||
sell_threshold=-0.8
|
||||
)
|
||||
|
||||
# 保存结果
|
||||
if results['factors'] is not None:
|
||||
results['factors'].to_csv("factors.csv")
|
||||
print("\n因子数据已保存到 factors.csv")
|
||||
|
||||
if results['score'] is not None:
|
||||
results['score'].to_csv("score.csv")
|
||||
print("综合得分已保存到 score.csv")
|
||||
|
||||
Reference in New Issue
Block a user