etf/rotation/tracking_error_full.py

"""
ETF跟踪误差全量计算
- 覆盖轮动策略标的池全部10个标的
- 数据源分层：
  - A股指数 → Tushare index_daily
  - 商品 → Tushare fut_daily（主力合约）
  - 海外指数 → Flask API (yfinance)
- 与天天基金数据对比校验
"""
import os
import sys
import time
import json
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta

PROJECT_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from dotenv import load_dotenv
load_dotenv(PROJECT_ROOT / '.env')

import tushare as ts
from datasource.flask_api_source import FlaskAPIDataSource


# ============================================================
# 轮动策略标的池：全部10个标的
# ============================================================
POOL_CONFIG = {
    # --- A股指数（Tushare index_daily）---
    '399006.SZ': {
        'name': '创业板指', 'current_etf': '159915.SZ', 'group': 'A',
        'benchmark_type': 'tushare_index', 'benchmark_code': '399006.SZ',
    },
    'H30269.CSI': {
        'name': '红利低波', 'current_etf': '512890.SH', 'group': 'A',
        'benchmark_type': 'tushare_index', 'benchmark_code': 'H30269.CSI',
    },
    # --- 商品（Tushare fut_daily 主力合约）---
    'GC=F': {
        'name': '黄金', 'current_etf': '518880.SH', 'group': 'COMMODITY',
        'benchmark_type': 'tushare_futures', 'benchmark_code': 'AU.SHF',
    },
    'HG=F': {
        'name': '有色金属', 'current_etf': '159980.SZ', 'group': 'COMMODITY',
        'benchmark_type': 'tushare_futures', 'benchmark_code': 'CU.SHF',
    },
    # --- 海外指数（Flask API / yfinance）---
    'HSI': {
        'name': '恒生指数', 'current_etf': '159920.SZ', 'group': 'HK',
        'benchmark_type': 'flask_api', 'benchmark_code': '^HSI',
    },
    'HSTECH.HK': {
        'name': '恒生科技', 'current_etf': '513130.SH', 'group': 'HK',
        'benchmark_type': 'flask_api', 'benchmark_code': 'HSTECH.HK',
    },
    'NDX': {
        'name': '纳指100', 'current_etf': '513100.SH', 'group': 'US',
        'benchmark_type': 'flask_api', 'benchmark_code': '^NDX',
    },
    'N225': {
        'name': '日经225', 'current_etf': '513520.SH', 'group': 'JP',
        'benchmark_type': 'flask_api', 'benchmark_code': '^N225',
    },
    'GDAXI': {
        'name': '德国DAX', 'current_etf': '513030.SH', 'group': 'EU',
        'benchmark_type': 'flask_api', 'benchmark_code': '^GDAXI',
    },
    # --- 原油（用最早ETF做基准，无可靠数据源）---
    'CL=F': {
        'name': '原油', 'current_etf': '160723.SZ', 'group': 'COMMODITY',
        'benchmark_type': 'earliest_etf', 'benchmark_code': '159518.SZ',
    },
}


# ============================================================
# 数据获取函数
# ============================================================

def get_etf_nav_tushare(pro, etf_code, start_date, end_date):
    """获取ETF累计净值（Tushare fund_nav）"""
    try:
        df = pro.fund_nav(
            ts_code=etf_code,
            start_date=start_date.replace('-', ''),
            end_date=end_date.replace('-', '')
        )
        if df is not None and len(df) > 0:
            df['date'] = pd.to_datetime(df['nav_date'])
            df = df.set_index('date').sort_index()
            return df['accum_nav'].astype(float)
    except Exception as e:
        pass
    return None


def get_benchmark_tushare_index(pro, index_code, start_date, end_date):
    """获取A股指数收盘价（Tushare index_daily）"""
    try:
        df = pro.index_daily(
            ts_code=index_code,
            start_date=start_date.replace('-', ''),
            end_date=end_date.replace('-', '')
        )
        if df is not None and len(df) > 0:
            df['date'] = pd.to_datetime(df['trade_date'])
            df = df.set_index('date').sort_index()
            return df['close'].astype(float)
    except Exception as e:
        pass
    return None


def get_benchmark_tushare_futures(pro, fut_code, start_date, end_date):
    """获取期货主力合约收盘价（Tushare fut_daily）"""
    try:
        df = pro.fut_daily(
            ts_code=fut_code,
            start_date=start_date.replace('-', ''),
            end_date=end_date.replace('-', '')
        )
        if df is not None and len(df) > 0:
            df['date'] = pd.to_datetime(df['trade_date'])
            df = df.set_index('date').sort_index()
            return df['close'].astype(float)
    except Exception as e:
        pass
    return None


def get_benchmark_flask_api(flask_source, yf_code, start_date, end_date):
    """获取海外指数数据（Flask API / yfinance）"""
    try:
        df = flask_source.fetch(yf_code, start_date, end_date)
        if df is not None and len(df) > 0:
            return df['close'].astype(float)
    except Exception as e:
        pass
    return None


def get_etf_close_tushare(pro, etf_code, start_date, end_date):
    """获取ETF收盘价（用于原油等无基准数据的情况）"""
    try:
        df = pro.fund_daily(
            ts_code=etf_code,
            start_date=start_date.replace('-', ''),
            end_date=end_date.replace('-', '')
        )
        if df is not None and len(df) > 0:
            df['date'] = pd.to_datetime(df['trade_date'])
            df = df.set_index('date').sort_index()
            return df['close'].astype(float)
    except Exception as e:
        pass
    return None


# ============================================================
# 跟踪误差计算
# ============================================================

def calculate_tracking_error(etf_nav, benchmark_close):
    """
    计算跟踪误差
    公式：STDEV(每日偏离度) × √252
    每日偏离度 = ETF净值收益率 - 基准收益率
    """
    if etf_nav is None or benchmark_close is None:
        return None

    etf_ret = etf_nav.pct_change().dropna()
    bench_ret = benchmark_close.pct_change().dropna()

    common = etf_ret.index.intersection(bench_ret.index)
    if len(common) < 20:
        return None

    e = etf_ret.loc[common]
    b = bench_ret.loc[common]

    daily_deviation = e - b
    tracking_error = daily_deviation.std() * np.sqrt(252)
    correlation = e.corr(b)
    r_squared = correlation ** 2

    etf_cum = (1 + e).prod() - 1
    bench_cum = (1 + b).prod() - 1
    excess = etf_cum - bench_cum

    return {
        'annual_tracking_error': round(tracking_error * 100, 4),
        'correlation': round(correlation, 6),
        'r_squared': round(r_squared, 6),
        'etf_cum_return': round(etf_cum * 100, 2),
        'benchmark_cum_return': round(bench_cum * 100, 2),
        'excess_return': round(excess * 100, 2),
        'common_days': len(common),
    }


# ============================================================
# 主流程
# ============================================================

def main():
    print("=" * 80)
    print("ETF跟踪误差全量计算（10个标的）")
    print(f"分析日期: {datetime.now().strftime('%Y-%m-%d')}")
    print("=" * 80)

    # 初始化数据源
    pro = ts.pro_api(os.getenv('TUSHARE_TOKEN'))
    flask_source = FlaskAPIDataSource()

    # 分析区间：最近1年
    end_date = datetime.now().strftime('%Y-%m-%d')
    start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
    print(f"计算区间: {start_date} ~ {end_date}")

    # 加载天天基金数据（用于校验 + 获取ETF列表）
    eastmoney_path = PROJECT_ROOT / 'rotation' / 'results' / 'etf_competitor_analysis.json'
    eastmoney_data = {}
    if eastmoney_path.exists():
        with open(eastmoney_path, 'r', encoding='utf-8') as f:
            eastmoney_data = json.load(f)
        print(f"已加载天天基金数据: {len(eastmoney_data)} 个标的")

    # 按基准类型分组获取（减少重复请求）
    benchmark_cache = {}  # benchmark_key -> Series

    results = {}

    for key, info in POOL_CONFIG.items():
        index_name = info['name']
        current_etf = info['current_etf']
        btype = info['benchmark_type']
        bcode = info['benchmark_code']

        print(f"\n{'='*60}")
        print(f"=== {index_name} ({key}) | 基准类型: {btype} ===")
        print(f"{'='*60}")

        # Step 1: 获取基准数据（带缓存）
        bench_key = f"{btype}:{bcode}"
        if bench_key not in benchmark_cache:
            print(f"  获取基准数据: {bcode} ({btype})")
            if btype == 'tushare_index':
                benchmark = get_benchmark_tushare_index(pro, bcode, start_date, end_date)
            elif btype == 'tushare_futures':
                benchmark = get_benchmark_tushare_futures(pro, bcode, start_date, end_date)
            elif btype == 'flask_api':
                benchmark = get_benchmark_flask_api(flask_source, bcode, start_date, end_date)
            elif btype == 'earliest_etf':
                benchmark = get_etf_close_tushare(pro, bcode, start_date, end_date)
            else:
                benchmark = None

            if benchmark is not None:
                benchmark_cache[bench_key] = benchmark
                print(f"  ✓ 基准数据: {len(benchmark)} 天")
            else:
                print(f"  ✗ 基准数据获取失败")
                benchmark_cache[bench_key] = None
        else:
            benchmark = benchmark_cache[bench_key]
            print(f"  (缓存) 基准数据: {len(benchmark)} 天")

        if benchmark is None:
            print(f"  跳过（无基准数据）")
            continue

        # Step 2: 获取该标的下所有ETF
        etf_list = []
        if key in eastmoney_data:
            for etf in eastmoney_data[key]['etfs']:
                etf_list.append({
                    'code': etf['ts_code'],
                    'name': etf['name'],
                    'eastmoney_te': etf.get('annual_tracking_error', 'N/A'),
                })

        print(f"  共 {len(etf_list)} 只ETF需要计算")

        # Step 3: 逐只计算跟踪误差
        etf_results = []

        for etf_info in etf_list:
            etf_code = etf_info['code']
            etf_name = etf_info['name']

            # 获取ETF NAV（或收盘价）
            if btype == 'earliest_etf':
                # 原油：用收盘价对比收盘价
                etf_data = get_etf_close_tushare(pro, etf_code, start_date, end_date)
            else:
                etf_data = get_etf_nav_tushare(pro, etf_code, start_date, end_date)

            if etf_data is None or len(etf_data) < 20:
                continue

            tracking = calculate_tracking_error(etf_data, benchmark)
            if tracking is None:
                continue

            result = {
                'ts_code': etf_code,
                'name': etf_name,
                'tushare_te': tracking['annual_tracking_error'],
                'tushare_r2': tracking['r_squared'],
                'tushare_correlation': tracking['correlation'],
                'tushare_excess_return': tracking['excess_return'],
                'tushare_common_days': tracking['common_days'],
                'eastmoney_te': etf_info['eastmoney_te'],
                'is_current': etf_code == current_etf,
            }

            etf_results.append(result)
            time.sleep(0.05)

        # 按跟踪误差排序
        etf_results.sort(key=lambda x: x['tushare_te'])

        results[key] = {
            'index_name': index_name,
            'current_etf': current_etf,
            'benchmark_type': btype,
            'benchmark_code': bcode,
            'group': info['group'],
            'etf_count': len(etf_results),
            'etfs': etf_results,
        }

        # 打印结果
        print(f"\n  计算完成: {len(etf_results)} 只ETF")
        print(f"  {'代码':<12} {'名称':<20} {'TE':<10} {'天天基金TE':<12} {'R²':<8}")
        print(f"  {'-'*70}")

        for etf in etf_results[:10]:
            te_str = f"{etf['tushare_te']:.4f}%"
            em_te = etf['eastmoney_te']
            marker = " ★" if etf['is_current'] else ""
            print(f"  {etf['ts_code']:<12} {etf['name'][:20]:<20} {te_str:<10} {em_te:<12} {etf['tushare_r2']:<8}{marker}")

        if len(etf_results) > 10:
            print(f"  ... 还有 {len(etf_results) - 10} 只")

    # ============================================================
    # 保存结果
    # ============================================================
    output_dir = PROJECT_ROOT / 'rotation' / 'results'
    output_dir.mkdir(exist_ok=True)
    output_path = output_dir / 'tracking_error_full.json'

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2, default=str)

    print(f"\n{'='*80}")
    print(f"结果已保存: {output_path}")
    print(f"{'='*80}")

    # ============================================================
    # 汇总校验
    # ============================================================
    print(f"\n{'='*80}")
    print("全量校验汇总")
    print(f"{'='*80}")

    for key, data in results.items():
        matched = [e for e in data['etfs']
                   if e['eastmoney_te'] and e['eastmoney_te'] not in ['N/A', '--']]

        print(f"\n--- {data['index_name']} ({data['benchmark_type']}) ---")
        print(f"  ETF总数: {data['etf_count']} | 天天基金有数据: {len(matched)}")

        if matched:
            diffs = []
            for etf in matched:
                try:
                    em_te = float(etf['eastmoney_te'].replace('%', ''))
                    diffs.append(etf['tushare_te'] - em_te)
                except:
                    pass
            if diffs:
                print(f"  平均差异: {np.mean(diffs):+.4f}% | 最大差异: {max(diffs, key=abs):+.4f}%")

        # 打印前3名
        top3 = data['etfs'][:3]
        print(f"  Top3 (TE最低):")
        for i, etf in enumerate(top3, 1):
            marker = " ★当前" if etf['is_current'] else ""
            print(f"    {i}. {etf['ts_code']} {etf['name']} TE={etf['tushare_te']:.4f}%{marker}")


if __name__ == '__main__':
    main()