将分析/测试/实验脚本从核心目录移出: - enrich_etf_data.py → scripts/ - oil_tracking.py → analysis/ - tracking_error_full.py → analysis/ - tracking_error_validation.py → analysis/ - test_start_year_analysis.py → experiments/ - experiment_select_num.py → experiments/ rotation/ 目录现在只保留核心策略代码: - simple_rotation.py (策略主逻辑) - config_loader.py (配置加载) - config_simple.yaml (配置文件) - daily_scheduler.py (调度器)
290 lines
9.3 KiB
Python
290 lines
9.3 KiB
Python
"""
|
||
ETF跟踪误差计算与校验
|
||
- 使用Tushare数据计算ETF跟踪误差(基于NAV)
|
||
- 与天天基金数据对比校验
|
||
"""
|
||
import os
|
||
import sys
|
||
import time
|
||
import json
|
||
import pandas as pd
|
||
import numpy as np
|
||
from pathlib import Path
|
||
from datetime import datetime, timedelta
|
||
|
||
PROJECT_ROOT = Path(__file__).parent.parent
|
||
sys.path.insert(0, str(PROJECT_ROOT))
|
||
from dotenv import load_dotenv
|
||
load_dotenv(PROJECT_ROOT / '.env')
|
||
|
||
import tushare as ts
|
||
|
||
|
||
# 轮动策略标的池
|
||
POOL_INDEX_MAP = {
|
||
'399006.SZ': {
|
||
'name': '创业板指', 'current_etf': '159915.SZ', 'group': 'A',
|
||
'index_code': '399006.SZ',
|
||
},
|
||
'H30269.CSI': {
|
||
'name': '红利低波', 'current_etf': '512890.SH', 'group': 'A',
|
||
'index_code': 'H30269.CSI',
|
||
},
|
||
}
|
||
|
||
|
||
def get_etf_nav_data(pro, etf_code, start_date, end_date):
|
||
"""
|
||
获取ETF净值数据(使用fund_nav接口)
|
||
注意:ETF应使用accum_nav(累计净值)而非unit_nav(单位净值)
|
||
"""
|
||
try:
|
||
df = pro.fund_nav(
|
||
ts_code=etf_code,
|
||
start_date=start_date.replace('-', ''),
|
||
end_date=end_date.replace('-', '')
|
||
)
|
||
if df is not None and len(df) > 0:
|
||
df['date'] = pd.to_datetime(df['nav_date'])
|
||
df = df.set_index('date').sort_index()
|
||
# 使用累计净值
|
||
return df['accum_nav'].astype(float)
|
||
except Exception as e:
|
||
print(f" 获取 {etf_code} NAV失败: {e}")
|
||
return None
|
||
|
||
|
||
def get_index_data(pro, index_code, start_date, end_date):
|
||
"""获取指数日线数据"""
|
||
try:
|
||
df = pro.index_daily(
|
||
ts_code=index_code,
|
||
start_date=start_date.replace('-', ''),
|
||
end_date=end_date.replace('-', '')
|
||
)
|
||
if df is not None and len(df) > 0:
|
||
df['date'] = pd.to_datetime(df['trade_date'])
|
||
df = df.set_index('date').sort_index()
|
||
return df['close'].astype(float)
|
||
except Exception as e:
|
||
print(f" 获取指数 {index_code} 失败: {e}")
|
||
return None
|
||
|
||
|
||
def calculate_tracking_error(etf_nav, index_close):
|
||
"""
|
||
计算跟踪误差
|
||
公式:STDEV(每日偏离度) × √252
|
||
每日偏离度 = ETF净值收益率 - 指数收益率
|
||
"""
|
||
if etf_nav is None or index_close is None:
|
||
return None
|
||
|
||
# 计算收益率
|
||
etf_ret = etf_nav.pct_change().dropna()
|
||
idx_ret = index_close.pct_change().dropna()
|
||
|
||
# 对齐日期
|
||
common = etf_ret.index.intersection(idx_ret.index)
|
||
if len(common) < 20:
|
||
return None
|
||
|
||
e = etf_ret.loc[common]
|
||
i = idx_ret.loc[common]
|
||
|
||
# 每日偏离度
|
||
daily_deviation = e - i
|
||
|
||
# 跟踪误差 = 标准差 × √252
|
||
tracking_error = daily_deviation.std() * np.sqrt(252)
|
||
|
||
# 其他指标
|
||
correlation = e.corr(i)
|
||
r_squared = correlation ** 2
|
||
|
||
# 累计收益
|
||
etf_cum = (1 + e).prod() - 1
|
||
idx_cum = (1 + i).prod() - 1
|
||
excess = etf_cum - idx_cum
|
||
|
||
return {
|
||
'annual_tracking_error': round(tracking_error * 100, 4), # %
|
||
'correlation': round(correlation, 6),
|
||
'r_squared': round(r_squared, 6),
|
||
'etf_cum_return': round(etf_cum * 100, 2), # %
|
||
'index_cum_return': round(idx_cum * 100, 2), # %
|
||
'excess_return': round(excess * 100, 2), # %
|
||
'common_days': len(common),
|
||
}
|
||
|
||
|
||
def main():
|
||
print("=" * 80)
|
||
print("ETF跟踪误差计算与校验")
|
||
print(f"分析日期: {datetime.now().strftime('%Y-%m-%d')}")
|
||
print("=" * 80)
|
||
|
||
# 初始化
|
||
pro = ts.pro_api(os.getenv('TUSHARE_TOKEN'))
|
||
|
||
# 分析时间范围:最近1年
|
||
end_date = datetime.now().strftime('%Y-%m-%d')
|
||
start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
|
||
print(f"计算区间: {start_date} ~ {end_date}")
|
||
|
||
# 加载天天基金数据
|
||
eastmoney_path = PROJECT_ROOT / 'rotation' / 'results' / 'etf_competitor_analysis.json'
|
||
eastmoney_data = {}
|
||
if eastmoney_path.exists():
|
||
with open(eastmoney_path, 'r', encoding='utf-8') as f:
|
||
eastmoney_data = json.load(f)
|
||
print(f"已加载天天基金数据: {len(eastmoney_data)} 个指数")
|
||
|
||
# 对每个指数计算跟踪误差
|
||
print(f"\n开始计算跟踪误差...")
|
||
|
||
results = {}
|
||
|
||
for key, info in POOL_INDEX_MAP.items():
|
||
index_name = info['name']
|
||
index_code = info['index_code']
|
||
current_etf = info['current_etf']
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"=== {index_name} ({key}) ===")
|
||
print(f"{'='*60}")
|
||
|
||
# 获取指数数据
|
||
print(f" 获取指数数据: {index_code}")
|
||
index_data = get_index_data(pro, index_code, start_date, end_date)
|
||
if index_data is None:
|
||
print(f" ✗ 指数数据获取失败")
|
||
continue
|
||
print(f" ✓ 指数数据: {len(index_data)} 天")
|
||
|
||
# 获取该指数下所有ETF的NAV
|
||
etf_list = []
|
||
if key in eastmoney_data:
|
||
for etf in eastmoney_data[key]['etfs']:
|
||
etf_list.append({
|
||
'code': etf['ts_code'],
|
||
'name': etf['name'],
|
||
'eastmoney_te': etf.get('annual_tracking_error', 'N/A'),
|
||
})
|
||
|
||
print(f" 共 {len(etf_list)} 只ETF需要计算")
|
||
|
||
etf_results = []
|
||
|
||
for etf_info in etf_list:
|
||
etf_code = etf_info['code']
|
||
etf_name = etf_info['name']
|
||
|
||
# 获取ETF NAV
|
||
etf_nav = get_etf_nav_data(pro, etf_code, start_date, end_date)
|
||
if etf_nav is None or len(etf_nav) < 20:
|
||
continue
|
||
|
||
# 计算跟踪误差
|
||
tracking = calculate_tracking_error(etf_nav, index_data)
|
||
if tracking is None:
|
||
continue
|
||
|
||
result = {
|
||
'ts_code': etf_code,
|
||
'name': etf_name,
|
||
'tushare_te': tracking['annual_tracking_error'],
|
||
'tushare_r2': tracking['r_squared'],
|
||
'tushare_correlation': tracking['correlation'],
|
||
'tushare_excess_return': tracking['excess_return'],
|
||
'tushare_common_days': tracking['common_days'],
|
||
'eastmoney_te': etf_info['eastmoney_te'],
|
||
'is_current': etf_code == current_etf,
|
||
}
|
||
|
||
etf_results.append(result)
|
||
time.sleep(0.1)
|
||
|
||
# 按跟踪误差排序
|
||
etf_results.sort(key=lambda x: x['tushare_te'])
|
||
|
||
results[key] = {
|
||
'index_name': index_name,
|
||
'index_code': index_code,
|
||
'current_etf': current_etf,
|
||
'etf_count': len(etf_results),
|
||
'etfs': etf_results,
|
||
}
|
||
|
||
# 打印结果
|
||
print(f"\n 计算完成: {len(etf_results)} 只ETF")
|
||
print(f" {'代码':<12} {'名称':<20} {'Tushare TE':<12} {'天天基金 TE':<12} {'差异':<10} {'R²':<8}")
|
||
print(f" {'-'*80}")
|
||
|
||
for etf in etf_results[:10]:
|
||
tushare_te = f"{etf['tushare_te']:.4f}%"
|
||
eastmoney_te = etf['eastmoney_te']
|
||
|
||
# 计算差异
|
||
diff = 'N/A'
|
||
if eastmoney_te and eastmoney_te != 'N/A' and eastmoney_te != '--':
|
||
try:
|
||
em_te = float(eastmoney_te.replace('%', ''))
|
||
diff_val = etf['tushare_te'] - em_te
|
||
diff = f"{diff_val:+.4f}%"
|
||
except:
|
||
pass
|
||
|
||
marker = " ★" if etf['is_current'] else ""
|
||
print(f" {etf['ts_code']:<12} {etf['name'][:20]:<20} {tushare_te:<12} {eastmoney_te:<12} {diff:<10} {etf['tushare_r2']:<8}{marker}")
|
||
|
||
if len(etf_results) > 10:
|
||
print(f" ... 还有 {len(etf_results) - 10} 只")
|
||
|
||
# 保存结果
|
||
output_dir = PROJECT_ROOT / 'rotation' / 'results'
|
||
output_dir.mkdir(exist_ok=True)
|
||
output_path = output_dir / 'tracking_error_validation.json'
|
||
|
||
with open(output_path, 'w', encoding='utf-8') as f:
|
||
json.dump(results, f, ensure_ascii=False, indent=2, default=str)
|
||
|
||
print(f"\n{'='*80}")
|
||
print(f"结果已保存: {output_path}")
|
||
print(f"{'='*80}")
|
||
|
||
# 汇总统计
|
||
print(f"\n{'='*80}")
|
||
print("校验汇总")
|
||
print(f"{'='*80}")
|
||
|
||
for key, data in results.items():
|
||
print(f"\n--- {data['index_name']} ---")
|
||
print(f" 指数代码: {data['index_code']}")
|
||
print(f" 计算ETF数: {data['etf_count']}")
|
||
|
||
# 统计有天天基金数据的ETF
|
||
matched = [e for e in data['etfs'] if e['eastmoney_te'] and e['eastmoney_te'] not in ['N/A', '--']]
|
||
print(f" 天天基金有数据: {len(matched)}")
|
||
|
||
if matched:
|
||
# 计算平均差异
|
||
diffs = []
|
||
for etf in matched:
|
||
try:
|
||
em_te = float(etf['eastmoney_te'].replace('%', ''))
|
||
diff = etf['tushare_te'] - em_te
|
||
diffs.append(diff)
|
||
except:
|
||
pass
|
||
|
||
if diffs:
|
||
avg_diff = np.mean(diffs)
|
||
max_diff = max(diffs, key=abs)
|
||
print(f" 平均差异: {avg_diff:+.4f}%")
|
||
print(f" 最大差异: {max_diff:+.4f}%")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|