""" ETF跟踪误差全量计算 - 覆盖轮动策略标的池全部10个标的 - 数据源分层: - A股指数 → Tushare index_daily - 商品 → Tushare fut_daily(主力合约) - 海外指数 → Flask API (yfinance) - 与天天基金数据对比校验 """ import os import sys import time import json import pandas as pd import numpy as np from pathlib import Path from datetime import datetime, timedelta PROJECT_ROOT = Path(__file__).parent.parent sys.path.insert(0, str(PROJECT_ROOT)) from dotenv import load_dotenv load_dotenv(PROJECT_ROOT / '.env') import tushare as ts from datasource.flask_api_source import FlaskAPIDataSource # ============================================================ # 轮动策略标的池:全部10个标的 # ============================================================ POOL_CONFIG = { # --- A股指数(Tushare index_daily)--- '399006.SZ': { 'name': '创业板指', 'current_etf': '159915.SZ', 'group': 'A', 'benchmark_type': 'tushare_index', 'benchmark_code': '399006.SZ', }, 'H30269.CSI': { 'name': '红利低波', 'current_etf': '512890.SH', 'group': 'A', 'benchmark_type': 'tushare_index', 'benchmark_code': 'H30269.CSI', }, # --- 商品(Tushare fut_daily 主力合约)--- 'GC=F': { 'name': '黄金', 'current_etf': '518880.SH', 'group': 'COMMODITY', 'benchmark_type': 'tushare_futures', 'benchmark_code': 'AU.SHF', }, 'HG=F': { 'name': '有色金属', 'current_etf': '159980.SZ', 'group': 'COMMODITY', 'benchmark_type': 'tushare_futures', 'benchmark_code': 'CU.SHF', }, # --- 海外指数(Flask API / yfinance)--- 'HSI': { 'name': '恒生指数', 'current_etf': '159920.SZ', 'group': 'HK', 'benchmark_type': 'flask_api', 'benchmark_code': '^HSI', }, 'HSTECH.HK': { 'name': '恒生科技', 'current_etf': '513130.SH', 'group': 'HK', 'benchmark_type': 'flask_api', 'benchmark_code': 'HSTECH.HK', }, 'NDX': { 'name': '纳指100', 'current_etf': '513100.SH', 'group': 'US', 'benchmark_type': 'flask_api', 'benchmark_code': '^NDX', }, 'N225': { 'name': '日经225', 'current_etf': '513520.SH', 'group': 'JP', 'benchmark_type': 'flask_api', 'benchmark_code': '^N225', }, 'GDAXI': { 'name': '德国DAX', 'current_etf': '513030.SH', 'group': 'EU', 'benchmark_type': 'flask_api', 'benchmark_code': '^GDAXI', }, # --- 原油(用最早ETF做基准,无可靠数据源)--- 'CL=F': { 'name': '原油', 'current_etf': '160723.SZ', 'group': 'COMMODITY', 'benchmark_type': 'earliest_etf', 'benchmark_code': '159518.SZ', }, } # ============================================================ # 数据获取函数 # ============================================================ def get_etf_nav_tushare(pro, etf_code, start_date, end_date): """获取ETF累计净值(Tushare fund_nav)""" try: df = pro.fund_nav( ts_code=etf_code, start_date=start_date.replace('-', ''), end_date=end_date.replace('-', '') ) if df is not None and len(df) > 0: df['date'] = pd.to_datetime(df['nav_date']) df = df.set_index('date').sort_index() return df['accum_nav'].astype(float) except Exception as e: pass return None def get_benchmark_tushare_index(pro, index_code, start_date, end_date): """获取A股指数收盘价(Tushare index_daily)""" try: df = pro.index_daily( ts_code=index_code, start_date=start_date.replace('-', ''), end_date=end_date.replace('-', '') ) if df is not None and len(df) > 0: df['date'] = pd.to_datetime(df['trade_date']) df = df.set_index('date').sort_index() return df['close'].astype(float) except Exception as e: pass return None def get_benchmark_tushare_futures(pro, fut_code, start_date, end_date): """获取期货主力合约收盘价(Tushare fut_daily)""" try: df = pro.fut_daily( ts_code=fut_code, start_date=start_date.replace('-', ''), end_date=end_date.replace('-', '') ) if df is not None and len(df) > 0: df['date'] = pd.to_datetime(df['trade_date']) df = df.set_index('date').sort_index() return df['close'].astype(float) except Exception as e: pass return None def get_benchmark_flask_api(flask_source, yf_code, start_date, end_date): """获取海外指数数据(Flask API / yfinance)""" try: df = flask_source.fetch(yf_code, start_date, end_date) if df is not None and len(df) > 0: return df['close'].astype(float) except Exception as e: pass return None def get_etf_close_tushare(pro, etf_code, start_date, end_date): """获取ETF收盘价(用于原油等无基准数据的情况)""" try: df = pro.fund_daily( ts_code=etf_code, start_date=start_date.replace('-', ''), end_date=end_date.replace('-', '') ) if df is not None and len(df) > 0: df['date'] = pd.to_datetime(df['trade_date']) df = df.set_index('date').sort_index() return df['close'].astype(float) except Exception as e: pass return None # ============================================================ # 跟踪误差计算 # ============================================================ def calculate_tracking_error(etf_nav, benchmark_close): """ 计算跟踪误差 公式:STDEV(每日偏离度) × √252 每日偏离度 = ETF净值收益率 - 基准收益率 """ if etf_nav is None or benchmark_close is None: return None etf_ret = etf_nav.pct_change().dropna() bench_ret = benchmark_close.pct_change().dropna() common = etf_ret.index.intersection(bench_ret.index) if len(common) < 20: return None e = etf_ret.loc[common] b = bench_ret.loc[common] daily_deviation = e - b tracking_error = daily_deviation.std() * np.sqrt(252) correlation = e.corr(b) r_squared = correlation ** 2 etf_cum = (1 + e).prod() - 1 bench_cum = (1 + b).prod() - 1 excess = etf_cum - bench_cum return { 'annual_tracking_error': round(tracking_error * 100, 4), 'correlation': round(correlation, 6), 'r_squared': round(r_squared, 6), 'etf_cum_return': round(etf_cum * 100, 2), 'benchmark_cum_return': round(bench_cum * 100, 2), 'excess_return': round(excess * 100, 2), 'common_days': len(common), } # ============================================================ # 主流程 # ============================================================ def main(): print("=" * 80) print("ETF跟踪误差全量计算(10个标的)") print(f"分析日期: {datetime.now().strftime('%Y-%m-%d')}") print("=" * 80) # 初始化数据源 pro = ts.pro_api(os.getenv('TUSHARE_TOKEN')) flask_source = FlaskAPIDataSource() # 分析区间:最近1年 end_date = datetime.now().strftime('%Y-%m-%d') start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') print(f"计算区间: {start_date} ~ {end_date}") # 加载天天基金数据(用于校验 + 获取ETF列表) eastmoney_path = PROJECT_ROOT / 'rotation' / 'results' / 'etf_competitor_analysis.json' eastmoney_data = {} if eastmoney_path.exists(): with open(eastmoney_path, 'r', encoding='utf-8') as f: eastmoney_data = json.load(f) print(f"已加载天天基金数据: {len(eastmoney_data)} 个标的") # 按基准类型分组获取(减少重复请求) benchmark_cache = {} # benchmark_key -> Series results = {} for key, info in POOL_CONFIG.items(): index_name = info['name'] current_etf = info['current_etf'] btype = info['benchmark_type'] bcode = info['benchmark_code'] print(f"\n{'='*60}") print(f"=== {index_name} ({key}) | 基准类型: {btype} ===") print(f"{'='*60}") # Step 1: 获取基准数据(带缓存) bench_key = f"{btype}:{bcode}" if bench_key not in benchmark_cache: print(f" 获取基准数据: {bcode} ({btype})") if btype == 'tushare_index': benchmark = get_benchmark_tushare_index(pro, bcode, start_date, end_date) elif btype == 'tushare_futures': benchmark = get_benchmark_tushare_futures(pro, bcode, start_date, end_date) elif btype == 'flask_api': benchmark = get_benchmark_flask_api(flask_source, bcode, start_date, end_date) elif btype == 'earliest_etf': benchmark = get_etf_close_tushare(pro, bcode, start_date, end_date) else: benchmark = None if benchmark is not None: benchmark_cache[bench_key] = benchmark print(f" ✓ 基准数据: {len(benchmark)} 天") else: print(f" ✗ 基准数据获取失败") benchmark_cache[bench_key] = None else: benchmark = benchmark_cache[bench_key] print(f" (缓存) 基准数据: {len(benchmark)} 天") if benchmark is None: print(f" 跳过(无基准数据)") continue # Step 2: 获取该标的下所有ETF etf_list = [] if key in eastmoney_data: for etf in eastmoney_data[key]['etfs']: etf_list.append({ 'code': etf['ts_code'], 'name': etf['name'], 'eastmoney_te': etf.get('annual_tracking_error', 'N/A'), }) print(f" 共 {len(etf_list)} 只ETF需要计算") # Step 3: 逐只计算跟踪误差 etf_results = [] for etf_info in etf_list: etf_code = etf_info['code'] etf_name = etf_info['name'] # 获取ETF NAV(或收盘价) if btype == 'earliest_etf': # 原油:用收盘价对比收盘价 etf_data = get_etf_close_tushare(pro, etf_code, start_date, end_date) else: etf_data = get_etf_nav_tushare(pro, etf_code, start_date, end_date) if etf_data is None or len(etf_data) < 20: continue tracking = calculate_tracking_error(etf_data, benchmark) if tracking is None: continue result = { 'ts_code': etf_code, 'name': etf_name, 'tushare_te': tracking['annual_tracking_error'], 'tushare_r2': tracking['r_squared'], 'tushare_correlation': tracking['correlation'], 'tushare_excess_return': tracking['excess_return'], 'tushare_common_days': tracking['common_days'], 'eastmoney_te': etf_info['eastmoney_te'], 'is_current': etf_code == current_etf, } etf_results.append(result) time.sleep(0.05) # 按跟踪误差排序 etf_results.sort(key=lambda x: x['tushare_te']) results[key] = { 'index_name': index_name, 'current_etf': current_etf, 'benchmark_type': btype, 'benchmark_code': bcode, 'group': info['group'], 'etf_count': len(etf_results), 'etfs': etf_results, } # 打印结果 print(f"\n 计算完成: {len(etf_results)} 只ETF") print(f" {'代码':<12} {'名称':<20} {'TE':<10} {'天天基金TE':<12} {'R²':<8}") print(f" {'-'*70}") for etf in etf_results[:10]: te_str = f"{etf['tushare_te']:.4f}%" em_te = etf['eastmoney_te'] marker = " ★" if etf['is_current'] else "" print(f" {etf['ts_code']:<12} {etf['name'][:20]:<20} {te_str:<10} {em_te:<12} {etf['tushare_r2']:<8}{marker}") if len(etf_results) > 10: print(f" ... 还有 {len(etf_results) - 10} 只") # ============================================================ # 保存结果 # ============================================================ output_dir = PROJECT_ROOT / 'rotation' / 'results' output_dir.mkdir(exist_ok=True) output_path = output_dir / 'tracking_error_full.json' with open(output_path, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=2, default=str) print(f"\n{'='*80}") print(f"结果已保存: {output_path}") print(f"{'='*80}") # ============================================================ # 汇总校验 # ============================================================ print(f"\n{'='*80}") print("全量校验汇总") print(f"{'='*80}") for key, data in results.items(): matched = [e for e in data['etfs'] if e['eastmoney_te'] and e['eastmoney_te'] not in ['N/A', '--']] print(f"\n--- {data['index_name']} ({data['benchmark_type']}) ---") print(f" ETF总数: {data['etf_count']} | 天天基金有数据: {len(matched)}") if matched: diffs = [] for etf in matched: try: em_te = float(etf['eastmoney_te'].replace('%', '')) diffs.append(etf['tushare_te'] - em_te) except: pass if diffs: print(f" 平均差异: {np.mean(diffs):+.4f}% | 最大差异: {max(diffs, key=abs):+.4f}%") # 打印前3名 top3 = data['etfs'][:3] print(f" Top3 (TE最低):") for i, etf in enumerate(top3, 1): marker = " ★当前" if etf['is_current'] else "" print(f" {i}. {etf['ts_code']} {etf['name']} TE={etf['tushare_te']:.4f}%{marker}") if __name__ == '__main__': main()