""" 主流程:时间序列因子挖掘、检验、回测、信号生成 """ import pandas as pd from typing import Dict, List, Optional import warnings warnings.filterwarnings('ignore') from data import load_data, compute_technical_indicators, preprocess_data, compute_forward_returns from factors import FactorMiner, create_default_factors from validation import validate_factor, factor_span_regression from combination import MultiFactorModel from backtest import BacktestEngine from signal import generate_signals class FactorPipeline: """因子挖掘流程""" def __init__( self, ret_horizon: int = 1, ic_window: int = 30, commission: float = 0.001, slippage: float = 0.0005 ): """ Parameters: ----------- ret_horizon : int 未来收益率周期 ic_window : int IC计算窗口 commission : float 手续费率 slippage : float 滑点 """ self.ret_horizon = ret_horizon self.ic_window = ic_window self.commission = commission self.slippage = slippage self.data: Optional[pd.DataFrame] = None self.factors: Optional[pd.DataFrame] = None self.forward_return: Optional[pd.Series] = None self.factor_miner: Optional[FactorMiner] = None self.validation_results: Dict = {} self.model: Optional[MultiFactorModel] = None self.score: Optional[pd.Series] = None self.backtest_results: Optional[Dict] = None def load_and_preprocess(self, file_path: str) -> 'FactorPipeline': """步骤1:加载和预处理数据""" print("=" * 50) print("步骤1:加载和预处理数据") print("=" * 50) # 加载数据 self.data = load_data(file_path) print(f"加载数据: {len(self.data)} 条记录") # 计算技术指标 self.data = compute_technical_indicators(self.data) print("计算技术指标完成") # 预处理 self.data = preprocess_data(self.data) print("数据预处理完成") # 计算未来收益率 self.forward_return = compute_forward_returns( self.data['close'], horizon=self.ret_horizon ) print(f"计算未来收益率完成(周期={self.ret_horizon})") return self def mine_factors(self, custom_miner: Optional[FactorMiner] = None) -> 'FactorPipeline': """步骤2:因子挖掘""" print("\n" + "=" * 50) print("步骤2:因子挖掘") print("=" * 50) if self.data is None: raise ValueError("请先加载数据") # 使用自定义或默认因子挖掘器 if custom_miner is None: self.factor_miner = create_default_factors() else: self.factor_miner = custom_miner # 计算所有因子 self.factors = self.factor_miner.compute_all_factors(self.data) print(f"计算因子完成: {list(self.factors.columns)}") return self def validate_factors(self, min_ic: float = 0.01, min_tstat: float = 1.5) -> 'FactorPipeline': """步骤3:因子检验""" print("\n" + "=" * 50) print("步骤3:因子检验") print("=" * 50) if self.factors is None or self.forward_return is None: raise ValueError("请先完成因子挖掘") valid_factors = [] self.validation_results = {} for factor_name in self.factors.columns: factor = self.factors[factor_name] # 综合检验 result = validate_factor(factor, self.forward_return, ic_window=self.ic_window) self.validation_results[factor_name] = result # 筛选有效因子 if (abs(result['mean_ic']) >= min_ic and abs(result['mean_h_l_tstat']) >= min_tstat): valid_factors.append(factor_name) print(f"\n因子 {factor_name}:") print(f" 平均IC: {result['mean_ic']:.4f}") print(f" IC信息比率: {result['ic_ir']:.4f}") print(f" H-L收益差: {result['mean_h_l_return']:.4f}") print(f" H-L t统计量: {result['mean_h_l_tstat']:.4f}") else: print(f"\n因子 {factor_name} 未通过检验 (IC={result['mean_ic']:.4f}, t={result['mean_h_l_tstat']:.4f})") # 只保留有效因子 if valid_factors: self.factors = self.factors[valid_factors] print(f"\n有效因子: {valid_factors}") else: print("\n警告:没有因子通过检验!") return self def combine_factors( self, weight_method: str = 'risk_parity', window: Optional[int] = None ) -> 'FactorPipeline': """步骤4:因子组合""" print("\n" + "=" * 50) print("步骤4:因子组合") print("=" * 50) if self.factors is None or len(self.factors.columns) == 0: raise ValueError("没有有效因子可组合") # 创建多因子模型 self.model = MultiFactorModel(weight_method=weight_method) self.model.fit( self.factors, forward_return=self.forward_return, window=window ) # 计算综合得分 self.score = self.model.predict(self.factors) # 显示权重 weights = self.model.get_weights() print("因子权重:") for name, weight in weights.items(): print(f" {name}: {weight:.4f}") print(f"\n综合得分统计:") print(f" 均值: {self.score.mean():.4f}") print(f" 标准差: {self.score.std():.4f}") return self def generate_signals( self, buy_threshold: float = 0.8, sell_threshold: float = -0.8, window: int = 30 ) -> pd.Series: """步骤5:生成交易信号""" if self.score is None: raise ValueError("请先完成因子组合") signals = generate_signals( self.score, buy_threshold=buy_threshold, sell_threshold=sell_threshold, window=window ) return signals def backtest( self, signals: Optional[pd.Series] = None, buy_threshold: float = 0.8, sell_threshold: float = -0.8, window: int = 30 ) -> Dict: """步骤6:回测""" print("\n" + "=" * 50) print("步骤6:回测") print("=" * 50) if self.data is None: raise ValueError("请先加载数据") if signals is None: signals = self.generate_signals(buy_threshold, sell_threshold, window) # 创建回测引擎 engine = BacktestEngine( commission=self.commission, slippage=self.slippage ) # 运行回测 self.backtest_results = engine.run( signals, self.data['close'], score=self.score ) # 显示结果 metrics = self.backtest_results['metrics'] print("\n回测结果:") print(f" 总收益率: {metrics.get('total_return', 0)*100:.2f}%") print(f" 年化收益率: {metrics.get('annual_return', 0)*100:.2f}%") print(f" 年化波动率: {metrics.get('annual_volatility', 0)*100:.2f}%") print(f" 夏普比率: {metrics.get('sharpe_ratio', 0):.2f}") print(f" 最大回撤: {metrics.get('max_drawdown', 0)*100:.2f}%") print(f" 胜率: {metrics.get('win_rate', 0)*100:.2f}%") print(f" 盈亏比: {metrics.get('profit_loss_ratio', 0):.2f}") print(f" 交易次数: {metrics.get('total_trades', 0)}") return self.backtest_results def run_full_pipeline( self, file_path: str, custom_miner: Optional[FactorMiner] = None, min_ic: float = 0.01, min_tstat: float = 1.5, weight_method: str = 'risk_parity', buy_threshold: float = 0.8, sell_threshold: float = -0.8 ) -> Dict: """运行完整流程""" self.load_and_preprocess(file_path) \ .mine_factors(custom_miner) \ .validate_factors(min_ic, min_tstat) \ .combine_factors(weight_method) \ .backtest(buy_threshold=buy_threshold, sell_threshold=sell_threshold) return { 'factors': self.factors, 'score': self.score, 'validation': self.validation_results, 'backtest': self.backtest_results } if __name__ == "__main__": # 示例使用 pipeline = FactorPipeline(ret_horizon=1, ic_window=30) results = pipeline.run_full_pipeline( file_path="ETH_USDT-1h.feather", min_ic=0.01, min_tstat=1.5, weight_method='risk_parity', buy_threshold=0.8, sell_threshold=-0.8 ) # 保存结果 if results['factors'] is not None: results['factors'].to_csv("factors.csv") print("\n因子数据已保存到 factors.csv") if results['score'] is not None: results['score'].to_csv("score.csv") print("综合得分已保存到 score.csv")