Files
etf/rotation/tracking_error_full.py
aszerW adb83d8cd7 feat: 实现贪心分配模式(greedy)
- config_loader.py: 添加 etf_pool 字段和 GREEDY 枚举
- config_simple.yaml: 每个资产添加 etf_pool 列表
- simple_rotation.py:
  - 添加 _compute_greedy_weights 方法
  - _calculate_daily_return 支持 greedy 模式
  - 向后兼容原有 rank/equal 模式

贪心算法:按 ETF 池容量分配仓位,装不下的顺延给下一名
- 有色金属(1 ETF): 吸收25%,顺延75%
- 原油(3 ETF): 吸收75%
- 黄金(4 ETF): 吸收100%

回测对比 (select_num=3):
- rank: 326.60% 累计收益, 1.24 夏普
- greedy: 421.35% 累计收益, 1.03 夏普
2026-06-21 12:40:40 +08:00

400 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
ETF跟踪误差全量计算
- 覆盖轮动策略标的池全部10个标的
- 数据源分层:
- A股指数 → Tushare index_daily
- 商品 → Tushare fut_daily主力合约
- 海外指数 → Flask API (yfinance)
- 与天天基金数据对比校验
"""
import os
import sys
import time
import json
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta
PROJECT_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from dotenv import load_dotenv
load_dotenv(PROJECT_ROOT / '.env')
import tushare as ts
from datasource.flask_api_source import FlaskAPIDataSource
# ============================================================
# 轮动策略标的池全部10个标的
# ============================================================
POOL_CONFIG = {
# --- A股指数Tushare index_daily---
'399006.SZ': {
'name': '创业板指', 'current_etf': '159915.SZ', 'group': 'A',
'benchmark_type': 'tushare_index', 'benchmark_code': '399006.SZ',
},
'H30269.CSI': {
'name': '红利低波', 'current_etf': '512890.SH', 'group': 'A',
'benchmark_type': 'tushare_index', 'benchmark_code': 'H30269.CSI',
},
# --- 商品Tushare fut_daily 主力合约)---
'GC=F': {
'name': '黄金', 'current_etf': '518880.SH', 'group': 'COMMODITY',
'benchmark_type': 'tushare_futures', 'benchmark_code': 'AU.SHF',
},
'HG=F': {
'name': '有色金属', 'current_etf': '159980.SZ', 'group': 'COMMODITY',
'benchmark_type': 'tushare_futures', 'benchmark_code': 'CU.SHF',
},
# --- 海外指数Flask API / yfinance---
'HSI': {
'name': '恒生指数', 'current_etf': '159920.SZ', 'group': 'HK',
'benchmark_type': 'flask_api', 'benchmark_code': '^HSI',
},
'HSTECH.HK': {
'name': '恒生科技', 'current_etf': '513130.SH', 'group': 'HK',
'benchmark_type': 'flask_api', 'benchmark_code': 'HSTECH.HK',
},
'NDX': {
'name': '纳指100', 'current_etf': '513100.SH', 'group': 'US',
'benchmark_type': 'flask_api', 'benchmark_code': '^NDX',
},
'N225': {
'name': '日经225', 'current_etf': '513520.SH', 'group': 'JP',
'benchmark_type': 'flask_api', 'benchmark_code': '^N225',
},
'GDAXI': {
'name': '德国DAX', 'current_etf': '513030.SH', 'group': 'EU',
'benchmark_type': 'flask_api', 'benchmark_code': '^GDAXI',
},
# --- 原油用最早ETF做基准无可靠数据源---
'CL=F': {
'name': '原油', 'current_etf': '160723.SZ', 'group': 'COMMODITY',
'benchmark_type': 'earliest_etf', 'benchmark_code': '159518.SZ',
},
}
# ============================================================
# 数据获取函数
# ============================================================
def get_etf_nav_tushare(pro, etf_code, start_date, end_date):
"""获取ETF累计净值Tushare fund_nav"""
try:
df = pro.fund_nav(
ts_code=etf_code,
start_date=start_date.replace('-', ''),
end_date=end_date.replace('-', '')
)
if df is not None and len(df) > 0:
df['date'] = pd.to_datetime(df['nav_date'])
df = df.set_index('date').sort_index()
return df['accum_nav'].astype(float)
except Exception as e:
pass
return None
def get_benchmark_tushare_index(pro, index_code, start_date, end_date):
"""获取A股指数收盘价Tushare index_daily"""
try:
df = pro.index_daily(
ts_code=index_code,
start_date=start_date.replace('-', ''),
end_date=end_date.replace('-', '')
)
if df is not None and len(df) > 0:
df['date'] = pd.to_datetime(df['trade_date'])
df = df.set_index('date').sort_index()
return df['close'].astype(float)
except Exception as e:
pass
return None
def get_benchmark_tushare_futures(pro, fut_code, start_date, end_date):
"""获取期货主力合约收盘价Tushare fut_daily"""
try:
df = pro.fut_daily(
ts_code=fut_code,
start_date=start_date.replace('-', ''),
end_date=end_date.replace('-', '')
)
if df is not None and len(df) > 0:
df['date'] = pd.to_datetime(df['trade_date'])
df = df.set_index('date').sort_index()
return df['close'].astype(float)
except Exception as e:
pass
return None
def get_benchmark_flask_api(flask_source, yf_code, start_date, end_date):
"""获取海外指数数据Flask API / yfinance"""
try:
df = flask_source.fetch(yf_code, start_date, end_date)
if df is not None and len(df) > 0:
return df['close'].astype(float)
except Exception as e:
pass
return None
def get_etf_close_tushare(pro, etf_code, start_date, end_date):
"""获取ETF收盘价用于原油等无基准数据的情况"""
try:
df = pro.fund_daily(
ts_code=etf_code,
start_date=start_date.replace('-', ''),
end_date=end_date.replace('-', '')
)
if df is not None and len(df) > 0:
df['date'] = pd.to_datetime(df['trade_date'])
df = df.set_index('date').sort_index()
return df['close'].astype(float)
except Exception as e:
pass
return None
# ============================================================
# 跟踪误差计算
# ============================================================
def calculate_tracking_error(etf_nav, benchmark_close):
"""
计算跟踪误差
公式STDEV(每日偏离度) × √252
每日偏离度 = ETF净值收益率 - 基准收益率
"""
if etf_nav is None or benchmark_close is None:
return None
etf_ret = etf_nav.pct_change().dropna()
bench_ret = benchmark_close.pct_change().dropna()
common = etf_ret.index.intersection(bench_ret.index)
if len(common) < 20:
return None
e = etf_ret.loc[common]
b = bench_ret.loc[common]
daily_deviation = e - b
tracking_error = daily_deviation.std() * np.sqrt(252)
correlation = e.corr(b)
r_squared = correlation ** 2
etf_cum = (1 + e).prod() - 1
bench_cum = (1 + b).prod() - 1
excess = etf_cum - bench_cum
return {
'annual_tracking_error': round(tracking_error * 100, 4),
'correlation': round(correlation, 6),
'r_squared': round(r_squared, 6),
'etf_cum_return': round(etf_cum * 100, 2),
'benchmark_cum_return': round(bench_cum * 100, 2),
'excess_return': round(excess * 100, 2),
'common_days': len(common),
}
# ============================================================
# 主流程
# ============================================================
def main():
print("=" * 80)
print("ETF跟踪误差全量计算10个标的")
print(f"分析日期: {datetime.now().strftime('%Y-%m-%d')}")
print("=" * 80)
# 初始化数据源
pro = ts.pro_api(os.getenv('TUSHARE_TOKEN'))
flask_source = FlaskAPIDataSource()
# 分析区间最近1年
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
print(f"计算区间: {start_date} ~ {end_date}")
# 加载天天基金数据(用于校验 + 获取ETF列表
eastmoney_path = PROJECT_ROOT / 'rotation' / 'results' / 'etf_competitor_analysis.json'
eastmoney_data = {}
if eastmoney_path.exists():
with open(eastmoney_path, 'r', encoding='utf-8') as f:
eastmoney_data = json.load(f)
print(f"已加载天天基金数据: {len(eastmoney_data)} 个标的")
# 按基准类型分组获取(减少重复请求)
benchmark_cache = {} # benchmark_key -> Series
results = {}
for key, info in POOL_CONFIG.items():
index_name = info['name']
current_etf = info['current_etf']
btype = info['benchmark_type']
bcode = info['benchmark_code']
print(f"\n{'='*60}")
print(f"=== {index_name} ({key}) | 基准类型: {btype} ===")
print(f"{'='*60}")
# Step 1: 获取基准数据(带缓存)
bench_key = f"{btype}:{bcode}"
if bench_key not in benchmark_cache:
print(f" 获取基准数据: {bcode} ({btype})")
if btype == 'tushare_index':
benchmark = get_benchmark_tushare_index(pro, bcode, start_date, end_date)
elif btype == 'tushare_futures':
benchmark = get_benchmark_tushare_futures(pro, bcode, start_date, end_date)
elif btype == 'flask_api':
benchmark = get_benchmark_flask_api(flask_source, bcode, start_date, end_date)
elif btype == 'earliest_etf':
benchmark = get_etf_close_tushare(pro, bcode, start_date, end_date)
else:
benchmark = None
if benchmark is not None:
benchmark_cache[bench_key] = benchmark
print(f" ✓ 基准数据: {len(benchmark)}")
else:
print(f" ✗ 基准数据获取失败")
benchmark_cache[bench_key] = None
else:
benchmark = benchmark_cache[bench_key]
print(f" (缓存) 基准数据: {len(benchmark)}")
if benchmark is None:
print(f" 跳过(无基准数据)")
continue
# Step 2: 获取该标的下所有ETF
etf_list = []
if key in eastmoney_data:
for etf in eastmoney_data[key]['etfs']:
etf_list.append({
'code': etf['ts_code'],
'name': etf['name'],
'eastmoney_te': etf.get('annual_tracking_error', 'N/A'),
})
print(f"{len(etf_list)} 只ETF需要计算")
# Step 3: 逐只计算跟踪误差
etf_results = []
for etf_info in etf_list:
etf_code = etf_info['code']
etf_name = etf_info['name']
# 获取ETF NAV或收盘价
if btype == 'earliest_etf':
# 原油:用收盘价对比收盘价
etf_data = get_etf_close_tushare(pro, etf_code, start_date, end_date)
else:
etf_data = get_etf_nav_tushare(pro, etf_code, start_date, end_date)
if etf_data is None or len(etf_data) < 20:
continue
tracking = calculate_tracking_error(etf_data, benchmark)
if tracking is None:
continue
result = {
'ts_code': etf_code,
'name': etf_name,
'tushare_te': tracking['annual_tracking_error'],
'tushare_r2': tracking['r_squared'],
'tushare_correlation': tracking['correlation'],
'tushare_excess_return': tracking['excess_return'],
'tushare_common_days': tracking['common_days'],
'eastmoney_te': etf_info['eastmoney_te'],
'is_current': etf_code == current_etf,
}
etf_results.append(result)
time.sleep(0.05)
# 按跟踪误差排序
etf_results.sort(key=lambda x: x['tushare_te'])
results[key] = {
'index_name': index_name,
'current_etf': current_etf,
'benchmark_type': btype,
'benchmark_code': bcode,
'group': info['group'],
'etf_count': len(etf_results),
'etfs': etf_results,
}
# 打印结果
print(f"\n 计算完成: {len(etf_results)} 只ETF")
print(f" {'代码':<12} {'名称':<20} {'TE':<10} {'天天基金TE':<12} {'':<8}")
print(f" {'-'*70}")
for etf in etf_results[:10]:
te_str = f"{etf['tushare_te']:.4f}%"
em_te = etf['eastmoney_te']
marker = "" if etf['is_current'] else ""
print(f" {etf['ts_code']:<12} {etf['name'][:20]:<20} {te_str:<10} {em_te:<12} {etf['tushare_r2']:<8}{marker}")
if len(etf_results) > 10:
print(f" ... 还有 {len(etf_results) - 10}")
# ============================================================
# 保存结果
# ============================================================
output_dir = PROJECT_ROOT / 'rotation' / 'results'
output_dir.mkdir(exist_ok=True)
output_path = output_dir / 'tracking_error_full.json'
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2, default=str)
print(f"\n{'='*80}")
print(f"结果已保存: {output_path}")
print(f"{'='*80}")
# ============================================================
# 汇总校验
# ============================================================
print(f"\n{'='*80}")
print("全量校验汇总")
print(f"{'='*80}")
for key, data in results.items():
matched = [e for e in data['etfs']
if e['eastmoney_te'] and e['eastmoney_te'] not in ['N/A', '--']]
print(f"\n--- {data['index_name']} ({data['benchmark_type']}) ---")
print(f" ETF总数: {data['etf_count']} | 天天基金有数据: {len(matched)}")
if matched:
diffs = []
for etf in matched:
try:
em_te = float(etf['eastmoney_te'].replace('%', ''))
diffs.append(etf['tushare_te'] - em_te)
except:
pass
if diffs:
print(f" 平均差异: {np.mean(diffs):+.4f}% | 最大差异: {max(diffs, key=abs):+.4f}%")
# 打印前3名
top3 = data['etfs'][:3]
print(f" Top3 (TE最低):")
for i, etf in enumerate(top3, 1):
marker = " ★当前" if etf['is_current'] else ""
print(f" {i}. {etf['ts_code']} {etf['name']} TE={etf['tushare_te']:.4f}%{marker}")
if __name__ == '__main__':
main()