第一版流程

This commit is contained in:
2025-11-08 13:39:02 +08:00
parent dcfe2d84d5
commit a66e42a8ae
11 changed files with 1648 additions and 0 deletions

287
pipeline.py Normal file
View File

@@ -0,0 +1,287 @@
"""
主流程:时间序列因子挖掘、检验、回测、信号生成
"""
import pandas as pd
from typing import Dict, List, Optional
import warnings
warnings.filterwarnings('ignore')
from data import load_data, compute_technical_indicators, preprocess_data, compute_forward_returns
from factors import FactorMiner, create_default_factors
from validation import validate_factor, factor_span_regression
from combination import MultiFactorModel
from backtest import BacktestEngine
from signal import generate_signals
class FactorPipeline:
"""因子挖掘流程"""
def __init__(
self,
ret_horizon: int = 1,
ic_window: int = 30,
commission: float = 0.001,
slippage: float = 0.0005
):
"""
Parameters:
-----------
ret_horizon : int
未来收益率周期
ic_window : int
IC计算窗口
commission : float
手续费率
slippage : float
滑点
"""
self.ret_horizon = ret_horizon
self.ic_window = ic_window
self.commission = commission
self.slippage = slippage
self.data: Optional[pd.DataFrame] = None
self.factors: Optional[pd.DataFrame] = None
self.forward_return: Optional[pd.Series] = None
self.factor_miner: Optional[FactorMiner] = None
self.validation_results: Dict = {}
self.model: Optional[MultiFactorModel] = None
self.score: Optional[pd.Series] = None
self.backtest_results: Optional[Dict] = None
def load_and_preprocess(self, file_path: str) -> 'FactorPipeline':
"""步骤1加载和预处理数据"""
print("=" * 50)
print("步骤1加载和预处理数据")
print("=" * 50)
# 加载数据
self.data = load_data(file_path)
print(f"加载数据: {len(self.data)} 条记录")
# 计算技术指标
self.data = compute_technical_indicators(self.data)
print("计算技术指标完成")
# 预处理
self.data = preprocess_data(self.data)
print("数据预处理完成")
# 计算未来收益率
self.forward_return = compute_forward_returns(
self.data['close'],
horizon=self.ret_horizon
)
print(f"计算未来收益率完成(周期={self.ret_horizon}")
return self
def mine_factors(self, custom_miner: Optional[FactorMiner] = None) -> 'FactorPipeline':
"""步骤2因子挖掘"""
print("\n" + "=" * 50)
print("步骤2因子挖掘")
print("=" * 50)
if self.data is None:
raise ValueError("请先加载数据")
# 使用自定义或默认因子挖掘器
if custom_miner is None:
self.factor_miner = create_default_factors()
else:
self.factor_miner = custom_miner
# 计算所有因子
self.factors = self.factor_miner.compute_all_factors(self.data)
print(f"计算因子完成: {list(self.factors.columns)}")
return self
def validate_factors(self, min_ic: float = 0.01, min_tstat: float = 1.5) -> 'FactorPipeline':
"""步骤3因子检验"""
print("\n" + "=" * 50)
print("步骤3因子检验")
print("=" * 50)
if self.factors is None or self.forward_return is None:
raise ValueError("请先完成因子挖掘")
valid_factors = []
self.validation_results = {}
for factor_name in self.factors.columns:
factor = self.factors[factor_name]
# 综合检验
result = validate_factor(factor, self.forward_return, ic_window=self.ic_window)
self.validation_results[factor_name] = result
# 筛选有效因子
if (abs(result['mean_ic']) >= min_ic and
abs(result['mean_h_l_tstat']) >= min_tstat):
valid_factors.append(factor_name)
print(f"\n因子 {factor_name}:")
print(f" 平均IC: {result['mean_ic']:.4f}")
print(f" IC信息比率: {result['ic_ir']:.4f}")
print(f" H-L收益差: {result['mean_h_l_return']:.4f}")
print(f" H-L t统计量: {result['mean_h_l_tstat']:.4f}")
else:
print(f"\n因子 {factor_name} 未通过检验 (IC={result['mean_ic']:.4f}, t={result['mean_h_l_tstat']:.4f})")
# 只保留有效因子
if valid_factors:
self.factors = self.factors[valid_factors]
print(f"\n有效因子: {valid_factors}")
else:
print("\n警告:没有因子通过检验!")
return self
def combine_factors(
self,
weight_method: str = 'risk_parity',
window: Optional[int] = None
) -> 'FactorPipeline':
"""步骤4因子组合"""
print("\n" + "=" * 50)
print("步骤4因子组合")
print("=" * 50)
if self.factors is None or len(self.factors.columns) == 0:
raise ValueError("没有有效因子可组合")
# 创建多因子模型
self.model = MultiFactorModel(weight_method=weight_method)
self.model.fit(
self.factors,
forward_return=self.forward_return,
window=window
)
# 计算综合得分
self.score = self.model.predict(self.factors)
# 显示权重
weights = self.model.get_weights()
print("因子权重:")
for name, weight in weights.items():
print(f" {name}: {weight:.4f}")
print(f"\n综合得分统计:")
print(f" 均值: {self.score.mean():.4f}")
print(f" 标准差: {self.score.std():.4f}")
return self
def generate_signals(
self,
buy_threshold: float = 0.8,
sell_threshold: float = -0.8,
window: int = 30
) -> pd.Series:
"""步骤5生成交易信号"""
if self.score is None:
raise ValueError("请先完成因子组合")
signals = generate_signals(
self.score,
buy_threshold=buy_threshold,
sell_threshold=sell_threshold,
window=window
)
return signals
def backtest(
self,
signals: Optional[pd.Series] = None,
buy_threshold: float = 0.8,
sell_threshold: float = -0.8,
window: int = 30
) -> Dict:
"""步骤6回测"""
print("\n" + "=" * 50)
print("步骤6回测")
print("=" * 50)
if self.data is None:
raise ValueError("请先加载数据")
if signals is None:
signals = self.generate_signals(buy_threshold, sell_threshold, window)
# 创建回测引擎
engine = BacktestEngine(
commission=self.commission,
slippage=self.slippage
)
# 运行回测
self.backtest_results = engine.run(
signals,
self.data['close'],
score=self.score
)
# 显示结果
metrics = self.backtest_results['metrics']
print("\n回测结果:")
print(f" 总收益率: {metrics.get('total_return', 0)*100:.2f}%")
print(f" 年化收益率: {metrics.get('annual_return', 0)*100:.2f}%")
print(f" 年化波动率: {metrics.get('annual_volatility', 0)*100:.2f}%")
print(f" 夏普比率: {metrics.get('sharpe_ratio', 0):.2f}")
print(f" 最大回撤: {metrics.get('max_drawdown', 0)*100:.2f}%")
print(f" 胜率: {metrics.get('win_rate', 0)*100:.2f}%")
print(f" 盈亏比: {metrics.get('profit_loss_ratio', 0):.2f}")
print(f" 交易次数: {metrics.get('total_trades', 0)}")
return self.backtest_results
def run_full_pipeline(
self,
file_path: str,
custom_miner: Optional[FactorMiner] = None,
min_ic: float = 0.01,
min_tstat: float = 1.5,
weight_method: str = 'risk_parity',
buy_threshold: float = 0.8,
sell_threshold: float = -0.8
) -> Dict:
"""运行完整流程"""
self.load_and_preprocess(file_path) \
.mine_factors(custom_miner) \
.validate_factors(min_ic, min_tstat) \
.combine_factors(weight_method) \
.backtest(buy_threshold=buy_threshold, sell_threshold=sell_threshold)
return {
'factors': self.factors,
'score': self.score,
'validation': self.validation_results,
'backtest': self.backtest_results
}
if __name__ == "__main__":
# 示例使用
pipeline = FactorPipeline(ret_horizon=1, ic_window=30)
results = pipeline.run_full_pipeline(
file_path="ETH_USDT-1h.feather",
min_ic=0.01,
min_tstat=1.5,
weight_method='risk_parity',
buy_threshold=0.8,
sell_threshold=-0.8
)
# 保存结果
if results['factors'] is not None:
results['factors'].to_csv("factors.csv")
print("\n因子数据已保存到 factors.csv")
if results['score'] is not None:
results['score'].to_csv("score.csv")
print("综合得分已保存到 score.csv")