Files
etf/rotation/tracking_error_validation.py
aszerW adb83d8cd7 feat: 实现贪心分配模式(greedy)
- config_loader.py: 添加 etf_pool 字段和 GREEDY 枚举
- config_simple.yaml: 每个资产添加 etf_pool 列表
- simple_rotation.py:
  - 添加 _compute_greedy_weights 方法
  - _calculate_daily_return 支持 greedy 模式
  - 向后兼容原有 rank/equal 模式

贪心算法:按 ETF 池容量分配仓位,装不下的顺延给下一名
- 有色金属(1 ETF): 吸收25%,顺延75%
- 原油(3 ETF): 吸收75%
- 黄金(4 ETF): 吸收100%

回测对比 (select_num=3):
- rank: 326.60% 累计收益, 1.24 夏普
- greedy: 421.35% 累计收益, 1.03 夏普
2026-06-21 12:40:40 +08:00

290 lines
9.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
ETF跟踪误差计算与校验
- 使用Tushare数据计算ETF跟踪误差基于NAV
- 与天天基金数据对比校验
"""
import os
import sys
import time
import json
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta
PROJECT_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from dotenv import load_dotenv
load_dotenv(PROJECT_ROOT / '.env')
import tushare as ts
# 轮动策略标的池
POOL_INDEX_MAP = {
'399006.SZ': {
'name': '创业板指', 'current_etf': '159915.SZ', 'group': 'A',
'index_code': '399006.SZ',
},
'H30269.CSI': {
'name': '红利低波', 'current_etf': '512890.SH', 'group': 'A',
'index_code': 'H30269.CSI',
},
}
def get_etf_nav_data(pro, etf_code, start_date, end_date):
"""
获取ETF净值数据使用fund_nav接口
注意ETF应使用accum_nav累计净值而非unit_nav单位净值
"""
try:
df = pro.fund_nav(
ts_code=etf_code,
start_date=start_date.replace('-', ''),
end_date=end_date.replace('-', '')
)
if df is not None and len(df) > 0:
df['date'] = pd.to_datetime(df['nav_date'])
df = df.set_index('date').sort_index()
# 使用累计净值
return df['accum_nav'].astype(float)
except Exception as e:
print(f" 获取 {etf_code} NAV失败: {e}")
return None
def get_index_data(pro, index_code, start_date, end_date):
"""获取指数日线数据"""
try:
df = pro.index_daily(
ts_code=index_code,
start_date=start_date.replace('-', ''),
end_date=end_date.replace('-', '')
)
if df is not None and len(df) > 0:
df['date'] = pd.to_datetime(df['trade_date'])
df = df.set_index('date').sort_index()
return df['close'].astype(float)
except Exception as e:
print(f" 获取指数 {index_code} 失败: {e}")
return None
def calculate_tracking_error(etf_nav, index_close):
"""
计算跟踪误差
公式STDEV(每日偏离度) × √252
每日偏离度 = ETF净值收益率 - 指数收益率
"""
if etf_nav is None or index_close is None:
return None
# 计算收益率
etf_ret = etf_nav.pct_change().dropna()
idx_ret = index_close.pct_change().dropna()
# 对齐日期
common = etf_ret.index.intersection(idx_ret.index)
if len(common) < 20:
return None
e = etf_ret.loc[common]
i = idx_ret.loc[common]
# 每日偏离度
daily_deviation = e - i
# 跟踪误差 = 标准差 × √252
tracking_error = daily_deviation.std() * np.sqrt(252)
# 其他指标
correlation = e.corr(i)
r_squared = correlation ** 2
# 累计收益
etf_cum = (1 + e).prod() - 1
idx_cum = (1 + i).prod() - 1
excess = etf_cum - idx_cum
return {
'annual_tracking_error': round(tracking_error * 100, 4), # %
'correlation': round(correlation, 6),
'r_squared': round(r_squared, 6),
'etf_cum_return': round(etf_cum * 100, 2), # %
'index_cum_return': round(idx_cum * 100, 2), # %
'excess_return': round(excess * 100, 2), # %
'common_days': len(common),
}
def main():
print("=" * 80)
print("ETF跟踪误差计算与校验")
print(f"分析日期: {datetime.now().strftime('%Y-%m-%d')}")
print("=" * 80)
# 初始化
pro = ts.pro_api(os.getenv('TUSHARE_TOKEN'))
# 分析时间范围最近1年
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
print(f"计算区间: {start_date} ~ {end_date}")
# 加载天天基金数据
eastmoney_path = PROJECT_ROOT / 'rotation' / 'results' / 'etf_competitor_analysis.json'
eastmoney_data = {}
if eastmoney_path.exists():
with open(eastmoney_path, 'r', encoding='utf-8') as f:
eastmoney_data = json.load(f)
print(f"已加载天天基金数据: {len(eastmoney_data)} 个指数")
# 对每个指数计算跟踪误差
print(f"\n开始计算跟踪误差...")
results = {}
for key, info in POOL_INDEX_MAP.items():
index_name = info['name']
index_code = info['index_code']
current_etf = info['current_etf']
print(f"\n{'='*60}")
print(f"=== {index_name} ({key}) ===")
print(f"{'='*60}")
# 获取指数数据
print(f" 获取指数数据: {index_code}")
index_data = get_index_data(pro, index_code, start_date, end_date)
if index_data is None:
print(f" ✗ 指数数据获取失败")
continue
print(f" ✓ 指数数据: {len(index_data)}")
# 获取该指数下所有ETF的NAV
etf_list = []
if key in eastmoney_data:
for etf in eastmoney_data[key]['etfs']:
etf_list.append({
'code': etf['ts_code'],
'name': etf['name'],
'eastmoney_te': etf.get('annual_tracking_error', 'N/A'),
})
print(f"{len(etf_list)} 只ETF需要计算")
etf_results = []
for etf_info in etf_list:
etf_code = etf_info['code']
etf_name = etf_info['name']
# 获取ETF NAV
etf_nav = get_etf_nav_data(pro, etf_code, start_date, end_date)
if etf_nav is None or len(etf_nav) < 20:
continue
# 计算跟踪误差
tracking = calculate_tracking_error(etf_nav, index_data)
if tracking is None:
continue
result = {
'ts_code': etf_code,
'name': etf_name,
'tushare_te': tracking['annual_tracking_error'],
'tushare_r2': tracking['r_squared'],
'tushare_correlation': tracking['correlation'],
'tushare_excess_return': tracking['excess_return'],
'tushare_common_days': tracking['common_days'],
'eastmoney_te': etf_info['eastmoney_te'],
'is_current': etf_code == current_etf,
}
etf_results.append(result)
time.sleep(0.1)
# 按跟踪误差排序
etf_results.sort(key=lambda x: x['tushare_te'])
results[key] = {
'index_name': index_name,
'index_code': index_code,
'current_etf': current_etf,
'etf_count': len(etf_results),
'etfs': etf_results,
}
# 打印结果
print(f"\n 计算完成: {len(etf_results)} 只ETF")
print(f" {'代码':<12} {'名称':<20} {'Tushare TE':<12} {'天天基金 TE':<12} {'差异':<10} {'':<8}")
print(f" {'-'*80}")
for etf in etf_results[:10]:
tushare_te = f"{etf['tushare_te']:.4f}%"
eastmoney_te = etf['eastmoney_te']
# 计算差异
diff = 'N/A'
if eastmoney_te and eastmoney_te != 'N/A' and eastmoney_te != '--':
try:
em_te = float(eastmoney_te.replace('%', ''))
diff_val = etf['tushare_te'] - em_te
diff = f"{diff_val:+.4f}%"
except:
pass
marker = "" if etf['is_current'] else ""
print(f" {etf['ts_code']:<12} {etf['name'][:20]:<20} {tushare_te:<12} {eastmoney_te:<12} {diff:<10} {etf['tushare_r2']:<8}{marker}")
if len(etf_results) > 10:
print(f" ... 还有 {len(etf_results) - 10}")
# 保存结果
output_dir = PROJECT_ROOT / 'rotation' / 'results'
output_dir.mkdir(exist_ok=True)
output_path = output_dir / 'tracking_error_validation.json'
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2, default=str)
print(f"\n{'='*80}")
print(f"结果已保存: {output_path}")
print(f"{'='*80}")
# 汇总统计
print(f"\n{'='*80}")
print("校验汇总")
print(f"{'='*80}")
for key, data in results.items():
print(f"\n--- {data['index_name']} ---")
print(f" 指数代码: {data['index_code']}")
print(f" 计算ETF数: {data['etf_count']}")
# 统计有天天基金数据的ETF
matched = [e for e in data['etfs'] if e['eastmoney_te'] and e['eastmoney_te'] not in ['N/A', '--']]
print(f" 天天基金有数据: {len(matched)}")
if matched:
# 计算平均差异
diffs = []
for etf in matched:
try:
em_te = float(etf['eastmoney_te'].replace('%', ''))
diff = etf['tushare_te'] - em_te
diffs.append(diff)
except:
pass
if diffs:
avg_diff = np.mean(diffs)
max_diff = max(diffs, key=abs)
print(f" 平均差异: {avg_diff:+.4f}%")
print(f" 最大差异: {max_diff:+.4f}%")
if __name__ == '__main__':
main()