Files
etf/tests/experiments/full_pool_top3_backtest.py
aszerW 6b59855c28 experiment(rotation): 同大类扩充与纳指vs标普替换对比实验
技术修复:
- SOCKS5代理IPv6问题:socks5:// → socks5h:// (hybrid_source.py, yfinance_source.py)

目录整理:
- scripts/ → 仅保留策略入口(daily_scheduler, run_rotation, run_cci_screener)
- 实验脚本移至 tests/experiments/
- 工具脚本移至 tests/utils/
- 实验记录新增 docs/experiments/
- results/ 添加到 gitignore

实验结果:

实验001 - 同大类扩充(添加标普500):
├─ 累计收益: 1467.35% → 1176.26% (-291%)
├─ CAGR: 48.10% → 43.82% (-4.28%)
├─ 调仓次数: 459 → 501 (+42次)
└─ 结论: 添加同大类标的不增加跨类分散,反而侵蚀收益

实验002 - 纳指vs标普替换对比:
├─ 累计收益: 1467.35% → 1118.77% (-348%)
├─ CAGR: 48.10% → 42.87% (-5.22%)
├─ Sharpe: 2.21 → 2.08 (-0.13)
├─ MaxDD: -17.33% → -15.14% (+2.18%)
└─ 结论: 纳指100优于标普500,成长风格更适合动量策略

策略建议:
- 保持纳指100作为美股大类代表
- 不添加同大类新标的(避免类内切换成本)
- 新增标的应优先考虑新大类(增加跨类分散)
2026-05-06 20:43:38 +08:00

236 lines
8.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
全市场44只ETF Top 3 等权轮动回测
标的池来源etf_rotation_deep_analysis.md
"""
import sys
import math
import warnings
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")
sys.path.insert(0, str(Path(__file__).parent.parent))
from dotenv import load_dotenv
load_dotenv()
from 动量 import (
fetch_all_etf_data,
fetch_etf_nav_data,
calc_atr,
calc_weighted_momentum_score,
apply_crash_filter,
calc_premium_rate,
print_performance,
print_yearly_returns,
)
# ==================== 资产配置池 (9个精选 + 恒生科技 + 恒生指数) ====================
FULL_POOL = {
'513100.SH': '纳指100ETF',
'513520.SH': '日经225ETF',
'513030.SH': '德国DAX ETF',
'518880.SH': '黄金ETF',
'159980.SZ': '有色金属ETF',
'160723.SZ': '嘉实原油LOF',
'511090.SH': '30年国债ETF',
'512890.SH': '红利低波ETF',
'159915.SZ': '创业板ETF',
'513130.SH': '恒生科技ETF',
'159920.SZ': '恒生ETF',
}
# ==================== 资产大类映射 ====================
ETF_CATEGORIES = {
'513100.SH': '美股',
'513520.SH': '日本',
'513030.SH': '欧洲',
'518880.SH': '商品',
'159980.SZ': '商品',
'160723.SZ': '商品',
'511090.SH': '固收',
'512890.SH': 'A股主题',
'159915.SZ': 'A股宽基',
'513130.SH': '港股',
'159920.SZ': '港股',
}
CONFIG = {
'etf_pool': FULL_POOL,
'target_num': 3, # 持仓数量
'auto_day': True, # 是否启用动态周期
'fixed_days': 25, # 固定回看天数
'min_days': 20, # 动态周期最小值
'max_days': 60, # 动态周期最大值
'premium_threshold': 5.0, # 溢价率阈值(%)
'trade_cost': 0.001, # 单次交易成本(双边)
'start_date': '2019-01-01',
'benchmark': '000300.SH', # 基准沪深300
}
def run_full_backtest(config: dict):
"""执行全市场回测"""
end_date = datetime.now().strftime('%Y-%m-%d')
etf_pool = config['etf_pool']
etf_codes = list(etf_pool.keys())
print("=" * 60)
print(" 全市场ETF轮动策略 - Top 3 等权回测")
print("=" * 60)
print(f" 候选ETF: {len(etf_codes)}")
print(f" 持仓数量: {config['target_num']}")
print(f" 回测区间: {config['start_date']} ~ {end_date}")
# 1. 获取数据 (使用缓存加速)
from scripts.etf_data_cache import ETFDataCache
data_cache = ETFDataCache()
print(f"\n{'='*60}")
print("加载数据...")
all_data = {}
for code in etf_codes:
df = data_cache.load_cached_ohlcv(code)
if not df.empty:
all_data[code] = df
print(f" 加载完成: {len(all_data)} 只价格数据")
# 2. 构建交易日历
all_dates = set()
for df in all_data.values():
all_dates.update(df.index.tolist())
trade_dates = sorted(d for d in all_dates if d >= pd.Timestamp(config['start_date']))
print(f" 交易日数: {len(trade_dates)}")
# 3. 逐日回测
print(f"\n{'='*60}")
print("开始回测...")
max_lookback = config['max_days'] + 10
holdings = {} # {code: weight}
daily_returns = []
signals = []
for i, today in enumerate(trade_dates):
# 计算得分
scores = {}
for code in etf_codes:
if code not in all_data: continue
df = all_data[code]
hist = df[df.index <= today].tail(max_lookback + 1)
if len(hist) < config['min_days']: continue
close_arr = hist['close'].values
# 动态周期
if config['auto_day'] and len(hist) >= max_lookback:
long_atr = calc_atr(hist['high'], hist['low'], hist['close'], config['max_days']).iloc[-1]
short_atr = calc_atr(hist['high'], hist['low'], hist['close'], config['min_days']).iloc[-1]
if long_atr > 0:
ratio = min(0.9, short_atr / long_atr)
lookback = int(config['min_days'] + (config['max_days'] - config['min_days']) * (1 - ratio))
else:
lookback = config['fixed_days']
else:
lookback = config['fixed_days']
prices = close_arr[-lookback:]
if len(prices) < 5: continue
result = calc_weighted_momentum_score(prices)
score = result['score']
score = apply_crash_filter(close_arr, score)
if 0 < score < 6:
scores[code] = score
# 选出排名最高的 3 只 (跨大类 Top 1 逻辑)
if scores:
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
# 1. 每个大类只保留最高分的那一个
category_best = {} # {category: (code, score)}
for code, score in ranked:
cat = ETF_CATEGORIES.get(code, '未知')
if cat not in category_best:
category_best[cat] = (code, score)
# 2. 对所有大类的 Top 1 进行排序,选前 3 个大类
sorted_categories = sorted(category_best.values(), key=lambda x: x[1], reverse=True)
targets = [code for code, score in sorted_categories[:config['target_num']]]
new_holdings = {c: 1.0/len(targets) for c in targets}
else:
new_holdings = {}
# 计算收益
port_ret = 0.0
for code, weight in holdings.items():
df_h = all_data[code]
if today in df_h.index:
prev_dates = df_h[df_h.index < today].index
if len(prev_dates) > 0:
prev_price = df_h.loc[prev_dates[-1], 'close']
port_ret += weight * (df_h.loc[today, 'close'] / prev_price - 1)
# 调仓成本
old_set, new_set = set(holdings.keys()), set(new_holdings.keys())
trade_cost = 0.0
if old_set != new_set:
turnover = sum(holdings[c] for c in old_set - new_set) + sum(new_holdings[c] for c in new_set - old_set)
trade_cost = turnover * config['trade_cost'] / 2
signals.append({'date': today, 'holdings': list(new_holdings.keys())})
holdings = new_holdings
daily_returns.append({
'date': today,
'daily_return': port_ret - trade_cost,
'holding': ", ".join(holdings.keys()) if holdings else "空仓"
})
# 4. 计算绩效
result_df = pd.DataFrame(daily_returns).set_index('date')
result_df['nav'] = (1 + result_df['daily_return']).cumprod()
# 基准
import os, tushare as ts
pro = ts.pro_api(os.getenv("TUSHARE_TOKEN"))
bench_df = pro.index_daily(ts_code=config['benchmark'], start_date=config['start_date'].replace('-', ''), end_date=end_date.replace('-', ''))
if bench_df is not None and not bench_df.empty:
bench_df['date'] = pd.to_datetime(bench_df['trade_date'])
bench_df = bench_df.set_index('date').sort_index()
result_df['bench_return'] = bench_df['close'].reindex(result_df.index, method='ffill') / bench_df['close'].iloc[0]
else:
result_df['bench_return'] = 1.0
print_performance(result_df, signals, config)
print_yearly_returns(result_df)
# 保存图表
save_chart(result_df)
def save_chart(result_df):
try:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
matplotlib.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
matplotlib.rcParams['axes.unicode_minus'] = False
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8), height_ratios=[3, 1], gridspec_kw={'hspace': 0.3})
ax1.plot(result_df.index, result_df['nav'], label='全市场Top3等权', color='#2ecc71')
ax1.plot(result_df.index, result_df['bench_return'], label='沪深300', color='#95a5a6')
ax1.set_yscale('log')
ax1.legend()
ax1.grid(True, alpha=0.3)
peak = result_df['nav'].cummax()
ax2.fill_between(result_df.index, (result_df['nav'] - peak) / peak, 0, color='#e74c3c', alpha=0.4)
plt.savefig(Path(__file__).parent.parent / 'results' / 'full_pool_top3_chart.png')
print(f"图表已保存到 results/full_pool_top3_chart.png")
except Exception as e: print(f"图表生成失败: {e}")
if __name__ == "__main__":
run_full_backtest(CONFIG)