问题:nav_series 或 backtest_result.index 存在重复日期时, reindex() 抛出 'cannot reindex on an axis with duplicate labels' 修复: - 先检查并去除 nav_series 的重复日期 - 同时检查并去除 backtest_result.index 的重复日期 - 使用 duplicated(keep='last') 保留最后一条记录
216 lines
8.1 KiB
Python
216 lines
8.1 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
使用新框架数据生成原引擎格式的报告
|
||
|
||
用法:
|
||
python scripts/generate_legacy_report.py
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import yaml
|
||
import pandas as pd
|
||
import numpy as np
|
||
from pathlib import Path
|
||
from dotenv import load_dotenv
|
||
|
||
load_dotenv()
|
||
|
||
# 添加项目根目录到 sys.path
|
||
project_root = Path(__file__).parent.parent
|
||
sys.path.insert(0, str(project_root))
|
||
|
||
# 导入新框架
|
||
from strategies.rotation.strategy import RotationStrategy
|
||
|
||
# 导入原引擎报告生成模块
|
||
archive_path = project_root / 'archive' / 'legacy_core'
|
||
sys.path.insert(0, str(archive_path))
|
||
from report import generate_performance_report
|
||
from core.common.utils import calculate_cagr, calculate_max_drawdown, calculate_sharpe
|
||
|
||
|
||
def run_with_legacy_report():
|
||
"""运行新框架回测并生成原引擎格式报告"""
|
||
|
||
# 加载配置
|
||
config_path = 'strategies/rotation/config.yaml'
|
||
with open(config_path, 'r', encoding='utf-8') as f:
|
||
config = yaml.safe_load(f)
|
||
|
||
# 新框架回测
|
||
print("=" * 60)
|
||
print(" ETF轮动策略 回测系统 (新框架)")
|
||
print("=" * 60)
|
||
|
||
strategy = RotationStrategy.from_yaml(config_path)
|
||
data = strategy.get_data()
|
||
|
||
# 计算因子
|
||
print("\n计算因子...")
|
||
factor_df = strategy.compute_factors(data)
|
||
|
||
# 生成信号
|
||
print("\n生成信号...")
|
||
signals = strategy.generate_signals(factor_df)
|
||
|
||
# 执行回测
|
||
print("\n执行回测...")
|
||
result = strategy.run_backtest(data=data)
|
||
|
||
# 准备原引擎格式的数据
|
||
backtest_result = result['result'].copy()
|
||
|
||
if backtest_result is None:
|
||
print("回测失败,无法生成报告")
|
||
return
|
||
|
||
# 重命名列以匹配原引擎格式
|
||
backtest_result['轮动策略净值'] = backtest_result['策略净值']
|
||
backtest_result['轮动策略日收益率'] = backtest_result['策略日收益率']
|
||
|
||
# 1. 基准净值和基准日收益率
|
||
benchmark_data = data.get('benchmark_data')
|
||
if benchmark_data is not None:
|
||
# benchmark_data 已经是 Series(close 价格)
|
||
if isinstance(benchmark_data, pd.Series):
|
||
benchmark_close = benchmark_data
|
||
elif isinstance(benchmark_data, pd.DataFrame):
|
||
benchmark_close = benchmark_data['close'] if 'close' in benchmark_data.columns else benchmark_data.iloc[:, 0]
|
||
else:
|
||
benchmark_close = None
|
||
|
||
if benchmark_close is not None and len(benchmark_close) > 0:
|
||
# 对齐基准数据到回测日期
|
||
benchmark_close_aligned = benchmark_close.reindex(backtest_result.index, method='ffill')
|
||
|
||
# 计算基准净值
|
||
benchmark_nav = (1 + benchmark_close_aligned.pct_change()).cumprod()
|
||
benchmark_nav = benchmark_nav / benchmark_nav.dropna().iloc[0] # 归一化起点为1
|
||
|
||
backtest_result['基准净值'] = benchmark_nav.values
|
||
backtest_result['基准日收益率'] = benchmark_close_aligned.pct_change().values
|
||
|
||
# 2. 各标的净值(指数价格)- 使用index_data而非index_close
|
||
# index_close可能对齐有问题,直接从index_data获取
|
||
index_data = data.get('index_data')
|
||
valid_codes = data['valid_codes']
|
||
|
||
for code in valid_codes:
|
||
if index_data is not None and code in index_data:
|
||
# 从原始OHLCV数据获取close价格
|
||
price_df = index_data[code]
|
||
if 'close' in price_df.columns:
|
||
price_series = price_df['close']
|
||
else:
|
||
price_series = price_df.iloc[:, 0] # 取第一列
|
||
|
||
# 对齐到回测日期
|
||
price_aligned = price_series.reindex(backtest_result.index, method='ffill')
|
||
|
||
# 处理最后几天的NaN(用最后一个有效值填充)
|
||
price_aligned = price_aligned.ffill() # 前向填充剩余NaN
|
||
|
||
# 计算该标的的净值曲线
|
||
nav_series = (1 + price_aligned.pct_change()).cumprod()
|
||
first_valid = nav_series.dropna().iloc[0] if len(nav_series.dropna()) > 0 else 1
|
||
nav_series = nav_series / first_valid # 归一化起点为1
|
||
|
||
backtest_result[f'净值_{code}'] = nav_series.values
|
||
backtest_result[code] = price_aligned.values # 当前价格
|
||
|
||
# 3. 得分列(从factor_df获取)
|
||
for code in valid_codes:
|
||
if code in factor_df.columns:
|
||
scores_aligned = factor_df[code].reindex(backtest_result.index, method='ffill')
|
||
backtest_result[f'得分_{code}'] = scores_aligned.values
|
||
|
||
# 4. 信号列(中文名)
|
||
backtest_result['信号'] = backtest_result['signal']
|
||
|
||
# 构建code_name_map和code_config
|
||
code_config = config.get('code_list', {})
|
||
code_name_map = {code: cfg.get('name', code) for code, cfg in code_config.items()}
|
||
|
||
# 准备ETF价格和净值数据(用于溢价率计算)
|
||
etf_data = data.get('etf_data')
|
||
etf_nav_data = data.get('etf_nav_data')
|
||
|
||
# ETF数据需要用ETF代码作为列名
|
||
etf_price_data = None
|
||
etf_nav_data_raw = None
|
||
|
||
if etf_data is not None:
|
||
# 转换列名:指数代码 -> ETF代码(通过etf_code_map)
|
||
# 并对齐到回测日期
|
||
etf_code_map = data.get('etf_code_map', {})
|
||
etf_price_data = pd.DataFrame(index=backtest_result.index)
|
||
for idx_code, etf_code in etf_code_map.items():
|
||
if etf_code in etf_data.columns:
|
||
# 对齐ETF价格数据到回测日期
|
||
price_aligned = etf_data[etf_code].reindex(backtest_result.index, method='ffill')
|
||
etf_price_data[idx_code] = price_aligned.values
|
||
|
||
# ETF净值数据现在是字典格式 {etf_code: DataFrame}
|
||
etf_nav_data_raw = None
|
||
|
||
if etf_nav_data and len(etf_nav_data) > 0:
|
||
# etf_nav_data 是字典 {etf_code: DataFrame}
|
||
etf_nav_data_raw = pd.DataFrame(index=backtest_result.index)
|
||
for idx_code, etf_code in etf_code_map.items():
|
||
if etf_code in etf_nav_data:
|
||
# 从字典中获取净值 DataFrame
|
||
nav_df = etf_nav_data[etf_code]
|
||
if isinstance(nav_df, pd.DataFrame) and 'nav' in nav_df.columns:
|
||
nav_series = nav_df['nav']
|
||
elif isinstance(nav_df, pd.DataFrame):
|
||
nav_series = nav_df.iloc[:, 0]
|
||
elif isinstance(nav_df, pd.Series):
|
||
nav_series = nav_df
|
||
else:
|
||
continue
|
||
# 对齐净值数据到回测日期(使用ffill处理日期差异)
|
||
# 先去除重复日期
|
||
if nav_series.index.has_duplicates:
|
||
nav_series = nav_series[~nav_series.index.duplicated(keep='last')]
|
||
# 确保 backtest_result.index 无重复
|
||
target_index = backtest_result.index
|
||
if target_index.has_duplicates:
|
||
target_index = target_index[~target_index.duplicated(keep='last')]
|
||
nav_aligned = nav_series.reindex(target_index, method='ffill')
|
||
etf_nav_data_raw[idx_code] = nav_aligned.values
|
||
|
||
# 生成原引擎格式的报告
|
||
print("\n" + "=" * 60)
|
||
print(" 生成原引擎格式报告")
|
||
print("=" * 60)
|
||
|
||
save_path = 'results/rotation_legacy'
|
||
os.makedirs('results', exist_ok=True)
|
||
|
||
# 获取index_close用于报告图表绘制
|
||
index_close = data.get('index_close')
|
||
|
||
metrics = generate_performance_report(
|
||
backtest_result=backtest_result,
|
||
code_list=valid_codes,
|
||
code_name_map=code_name_map,
|
||
benchmark_name=config.get('benchmark_name', '沪深300指数'),
|
||
save_path=save_path,
|
||
select_num=config.get('select_num', 3),
|
||
code_config=code_config,
|
||
index_data=index_close,
|
||
etf_price_data=etf_price_data,
|
||
etf_nav_data_raw=etf_nav_data_raw,
|
||
)
|
||
|
||
print(f"\n报告文件已生成:")
|
||
print(f" - {save_path}_chart.png")
|
||
print(f" - {save_path}_metrics.json")
|
||
print(f" - {save_path}_nav.csv")
|
||
|
||
return metrics
|
||
|
||
|
||
if __name__ == '__main__':
|
||
run_with_legacy_report() |