""" ETF跟踪误差计算与校验 - 使用Tushare数据计算ETF跟踪误差(基于NAV) - 与天天基金数据对比校验 """ import os import sys import time import json import pandas as pd import numpy as np from pathlib import Path from datetime import datetime, timedelta PROJECT_ROOT = Path(__file__).parent.parent sys.path.insert(0, str(PROJECT_ROOT)) from dotenv import load_dotenv load_dotenv(PROJECT_ROOT / '.env') import tushare as ts # 轮动策略标的池 POOL_INDEX_MAP = { '399006.SZ': { 'name': '创业板指', 'current_etf': '159915.SZ', 'group': 'A', 'index_code': '399006.SZ', }, 'H30269.CSI': { 'name': '红利低波', 'current_etf': '512890.SH', 'group': 'A', 'index_code': 'H30269.CSI', }, } def get_etf_nav_data(pro, etf_code, start_date, end_date): """ 获取ETF净值数据(使用fund_nav接口) 注意:ETF应使用accum_nav(累计净值)而非unit_nav(单位净值) """ try: df = pro.fund_nav( ts_code=etf_code, start_date=start_date.replace('-', ''), end_date=end_date.replace('-', '') ) if df is not None and len(df) > 0: df['date'] = pd.to_datetime(df['nav_date']) df = df.set_index('date').sort_index() # 使用累计净值 return df['accum_nav'].astype(float) except Exception as e: print(f" 获取 {etf_code} NAV失败: {e}") return None def get_index_data(pro, index_code, start_date, end_date): """获取指数日线数据""" try: df = pro.index_daily( ts_code=index_code, start_date=start_date.replace('-', ''), end_date=end_date.replace('-', '') ) if df is not None and len(df) > 0: df['date'] = pd.to_datetime(df['trade_date']) df = df.set_index('date').sort_index() return df['close'].astype(float) except Exception as e: print(f" 获取指数 {index_code} 失败: {e}") return None def calculate_tracking_error(etf_nav, index_close): """ 计算跟踪误差 公式:STDEV(每日偏离度) × √252 每日偏离度 = ETF净值收益率 - 指数收益率 """ if etf_nav is None or index_close is None: return None # 计算收益率 etf_ret = etf_nav.pct_change().dropna() idx_ret = index_close.pct_change().dropna() # 对齐日期 common = etf_ret.index.intersection(idx_ret.index) if len(common) < 20: return None e = etf_ret.loc[common] i = idx_ret.loc[common] # 每日偏离度 daily_deviation = e - i # 跟踪误差 = 标准差 × √252 tracking_error = daily_deviation.std() * np.sqrt(252) # 其他指标 correlation = e.corr(i) r_squared = correlation ** 2 # 累计收益 etf_cum = (1 + e).prod() - 1 idx_cum = (1 + i).prod() - 1 excess = etf_cum - idx_cum return { 'annual_tracking_error': round(tracking_error * 100, 4), # % 'correlation': round(correlation, 6), 'r_squared': round(r_squared, 6), 'etf_cum_return': round(etf_cum * 100, 2), # % 'index_cum_return': round(idx_cum * 100, 2), # % 'excess_return': round(excess * 100, 2), # % 'common_days': len(common), } def main(): print("=" * 80) print("ETF跟踪误差计算与校验") print(f"分析日期: {datetime.now().strftime('%Y-%m-%d')}") print("=" * 80) # 初始化 pro = ts.pro_api(os.getenv('TUSHARE_TOKEN')) # 分析时间范围:最近1年 end_date = datetime.now().strftime('%Y-%m-%d') start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') print(f"计算区间: {start_date} ~ {end_date}") # 加载天天基金数据 eastmoney_path = PROJECT_ROOT / 'rotation' / 'results' / 'etf_competitor_analysis.json' eastmoney_data = {} if eastmoney_path.exists(): with open(eastmoney_path, 'r', encoding='utf-8') as f: eastmoney_data = json.load(f) print(f"已加载天天基金数据: {len(eastmoney_data)} 个指数") # 对每个指数计算跟踪误差 print(f"\n开始计算跟踪误差...") results = {} for key, info in POOL_INDEX_MAP.items(): index_name = info['name'] index_code = info['index_code'] current_etf = info['current_etf'] print(f"\n{'='*60}") print(f"=== {index_name} ({key}) ===") print(f"{'='*60}") # 获取指数数据 print(f" 获取指数数据: {index_code}") index_data = get_index_data(pro, index_code, start_date, end_date) if index_data is None: print(f" ✗ 指数数据获取失败") continue print(f" ✓ 指数数据: {len(index_data)} 天") # 获取该指数下所有ETF的NAV etf_list = [] if key in eastmoney_data: for etf in eastmoney_data[key]['etfs']: etf_list.append({ 'code': etf['ts_code'], 'name': etf['name'], 'eastmoney_te': etf.get('annual_tracking_error', 'N/A'), }) print(f" 共 {len(etf_list)} 只ETF需要计算") etf_results = [] for etf_info in etf_list: etf_code = etf_info['code'] etf_name = etf_info['name'] # 获取ETF NAV etf_nav = get_etf_nav_data(pro, etf_code, start_date, end_date) if etf_nav is None or len(etf_nav) < 20: continue # 计算跟踪误差 tracking = calculate_tracking_error(etf_nav, index_data) if tracking is None: continue result = { 'ts_code': etf_code, 'name': etf_name, 'tushare_te': tracking['annual_tracking_error'], 'tushare_r2': tracking['r_squared'], 'tushare_correlation': tracking['correlation'], 'tushare_excess_return': tracking['excess_return'], 'tushare_common_days': tracking['common_days'], 'eastmoney_te': etf_info['eastmoney_te'], 'is_current': etf_code == current_etf, } etf_results.append(result) time.sleep(0.1) # 按跟踪误差排序 etf_results.sort(key=lambda x: x['tushare_te']) results[key] = { 'index_name': index_name, 'index_code': index_code, 'current_etf': current_etf, 'etf_count': len(etf_results), 'etfs': etf_results, } # 打印结果 print(f"\n 计算完成: {len(etf_results)} 只ETF") print(f" {'代码':<12} {'名称':<20} {'Tushare TE':<12} {'天天基金 TE':<12} {'差异':<10} {'R²':<8}") print(f" {'-'*80}") for etf in etf_results[:10]: tushare_te = f"{etf['tushare_te']:.4f}%" eastmoney_te = etf['eastmoney_te'] # 计算差异 diff = 'N/A' if eastmoney_te and eastmoney_te != 'N/A' and eastmoney_te != '--': try: em_te = float(eastmoney_te.replace('%', '')) diff_val = etf['tushare_te'] - em_te diff = f"{diff_val:+.4f}%" except: pass marker = " ★" if etf['is_current'] else "" print(f" {etf['ts_code']:<12} {etf['name'][:20]:<20} {tushare_te:<12} {eastmoney_te:<12} {diff:<10} {etf['tushare_r2']:<8}{marker}") if len(etf_results) > 10: print(f" ... 还有 {len(etf_results) - 10} 只") # 保存结果 output_dir = PROJECT_ROOT / 'rotation' / 'results' output_dir.mkdir(exist_ok=True) output_path = output_dir / 'tracking_error_validation.json' with open(output_path, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=2, default=str) print(f"\n{'='*80}") print(f"结果已保存: {output_path}") print(f"{'='*80}") # 汇总统计 print(f"\n{'='*80}") print("校验汇总") print(f"{'='*80}") for key, data in results.items(): print(f"\n--- {data['index_name']} ---") print(f" 指数代码: {data['index_code']}") print(f" 计算ETF数: {data['etf_count']}") # 统计有天天基金数据的ETF matched = [e for e in data['etfs'] if e['eastmoney_te'] and e['eastmoney_te'] not in ['N/A', '--']] print(f" 天天基金有数据: {len(matched)}") if matched: # 计算平均差异 diffs = [] for etf in matched: try: em_te = float(etf['eastmoney_te'].replace('%', '')) diff = etf['tushare_te'] - em_te diffs.append(diff) except: pass if diffs: avg_diff = np.mean(diffs) max_diff = max(diffs, key=abs) print(f" 平均差异: {avg_diff:+.4f}%") print(f" 最大差异: {max_diff:+.4f}%") if __name__ == '__main__': main()