diff --git a/rotation/config_loader.py b/rotation/config_loader.py index a1cba5f..0067b85 100644 --- a/rotation/config_loader.py +++ b/rotation/config_loader.py @@ -56,6 +56,7 @@ class WeightType(str, Enum): EQUAL = "equal" # 等权 RANK = "rank" # 按排名加权 (slot i gets (N-i)/triangular(N)) KELLY = "kelly" # Kelly准则近似 (score-proportional weighting) + GREEDY = "greedy" # 贪心分配(按ETF池容量吸收仓位) class DataSourceType(str, Enum): @@ -82,6 +83,7 @@ class AssetConfig(BaseModel): signal_source: str = Field(..., description="信号来源代码") trade_source: str = Field(..., description="交易来源代码") + etf_pool: List[str] = Field(default_factory=list, description="可交易ETF池(按规模排序)") description: Optional[str] = Field(None, description="标的描述") etf: Optional[str] = Field(None, description="ETF代码(兼容旧配置)") @@ -179,6 +181,7 @@ class RotationConfig(BaseModel): diversified: bool = Field(default=True) threshold: ThresholdConfig = Field(default_factory=ThresholdConfig) weight: WeightType = Field(default=WeightType.EQUAL) + etf_max_weight: float = Field(default=0.25, ge=0.01, le=1.0) class RebalanceConfig(BaseModel): diff --git a/rotation/config_simple.yaml b/rotation/config_simple.yaml index 3111bb1..f15174c 100644 --- a/rotation/config_simple.yaml +++ b/rotation/config_simple.yaml @@ -6,66 +6,77 @@ asset_pools: name: 创业板指 signal_source: 399006.SZ trade_source: 159915.SZ + etf_pool: [159915.SZ, 159952.SZ, 159957.SZ, 159948.SZ] 931862.CSI: description: 中证0-9个月国债指数,久期<1年,防御配置 group: BOND name: 短债指数 signal_source: 931862.CSI trade_source: 931862.CSI + etf_pool: [931862.CSI] CL=F: description: WTI原油期货(2000年至今) group: COMMODITY name: 原油 signal_source: CL=F trade_source: 160723.SZ + etf_pool: [160723.SZ, 501018.SH, 161129.SZ] GC=F: description: COMEX黄金期货(2000年至今) group: COMMODITY name: 黄金 signal_source: GC=F trade_source: 518880.SH + etf_pool: [518880.SH, 159937.SZ, 159934.SZ, 518800.SH] GDAXI: description: 德国DAX指数 group: EU name: 德国DAX signal_source: GDAXI trade_source: 513030.SH + etf_pool: [159561.SZ, 513030.SH] H30269.CSI: description: 红利低波指数 group: A name: 中证红利低波 signal_source: H30269.CSI trade_source: 512890.SH + etf_pool: [512890.SH, 563020.SH, 159547.SZ, 560150.SH] HG=F: description: COMEX铜期货(2000年至今) group: COMMODITY name: 有色金属 signal_source: HG=F trade_source: 159980.SZ + etf_pool: [159980.SZ] HSI: description: 恒生指数 group: HK name: 恒生指数 signal_source: HSI trade_source: 159920.SZ + etf_pool: [159920.SZ, 513600.SH, 159271.SZ, 513210.SH] HSTECH.HK: description: 恒生科技指数 group: HK name: 恒生科技 signal_source: HSTECH.HK trade_source: 513130.SH + etf_pool: [513180.SH, 513130.SH, 513010.SH, 159740.SZ] N225: description: 日经225指数 group: JP name: 日经225 signal_source: N225 trade_source: 513520.SH + etf_pool: [513880.SH, 513520.SH, 513000.SH, 159866.SZ] NDX: description: 纳斯达克100指数 group: US name: 纳指100 signal_source: NDX trade_source: 513100.SH + etf_pool: [159941.SZ, 513100.SH, 513300.SH, 159501.SZ] backtest: start_date: '2020-01-10' benchmark: @@ -109,6 +120,7 @@ rotation: diversified: true select_num: 3 weight: rank + etf_max_weight: 0.25 threshold: dynamic: fallback_enabled: true diff --git a/rotation/enrich_etf_data.py b/rotation/enrich_etf_data.py new file mode 100644 index 0000000..62b69f2 --- /dev/null +++ b/rotation/enrich_etf_data.py @@ -0,0 +1,173 @@ +""" +补充 ETF 丰富信息到 etf_basic_full.csv(批量版) +- fund_share: 最新基金份额 +- fund_daily: 近20日日均成交额 +- fund_nav: 最新累计净值 +""" +import os +import sys +import time +import pandas as pd +import numpy as np +from pathlib import Path +from datetime import datetime, timedelta + +PROJECT_ROOT = Path(__file__).parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) +from dotenv import load_dotenv +load_dotenv(PROJECT_ROOT / '.env') + +import tushare as ts + +pro = ts.pro_api(os.getenv('TUSHARE_TOKEN')) + +csv_path = PROJECT_ROOT / 'rotation' / 'results' / 'etf_basic_full.csv' +df = pd.read_csv(csv_path) +print(f"加载 {len(df)} 只ETF") + +# 初始化新列 +df['fund_scale_yi'] = np.nan +df['avg_amount_wan'] = np.nan +df['avg_vol_wan'] = np.nan +df['latest_nav'] = np.nan +df['nav_date'] = '' + +end_date = datetime.now().strftime('%Y%m%d') +start_date = (datetime.now() - timedelta(days=60)).strftime('%Y%m%d') +daily_start = (datetime.now() - timedelta(days=40)).strftime('%Y%m%d') + +all_codes = df['ts_code'].tolist() + +# ============================================================ +# 1. fund_share - 批量获取(每次最多100只) +# ============================================================ +print(f"\n[1/3] 获取基金份额...") +share_records = [] +batch_size = 100 +for i in range(0, len(all_codes), batch_size): + batch = all_codes[i:i+batch_size] + ts_codes_str = ','.join(batch) + try: + share_df = pro.fund_share(ts_code=ts_codes_str, start_date=start_date, end_date=end_date) + if share_df is not None and len(share_df) > 0: + share_records.append(share_df) + except Exception as e: + pass + if (i + batch_size) % 500 == 0: + print(f" 进度: {min(i+batch_size, len(all_codes))}/{len(all_codes)}") + time.sleep(0.3) + +if share_records: + all_shares = pd.concat(share_records, ignore_index=True) + # 每只取最新一条 + latest_shares = all_shares.sort_values('trade_date', ascending=False).drop_duplicates(subset=['ts_code'], keep='first') + share_map = dict(zip(latest_shares['ts_code'], latest_shares['fd_share'])) + df['fund_scale_yi'] = df['ts_code'].map(share_map) / 10000 # 万份→亿份 + print(f" ✓ 份额数据: {len(share_map)} 只") +else: + print(f" ✗ 份额数据获取失败") + +# ============================================================ +# 2. fund_daily - 批量获取 +# ============================================================ +print(f"\n[2/3] 获取日线行情...") +daily_records = [] +for i in range(0, len(all_codes), batch_size): + batch = all_codes[i:i+batch_size] + ts_codes_str = ','.join(batch) + try: + daily_df = pro.fund_daily(ts_code=ts_codes_str, start_date=daily_start, end_date=end_date) + if daily_df is not None and len(daily_df) > 0: + daily_records.append(daily_df) + except Exception as e: + pass + if (i + batch_size) % 500 == 0: + print(f" 进度: {min(i+batch_size, len(all_codes))}/{len(all_codes)}") + time.sleep(0.3) + +if daily_records: + all_daily = pd.concat(daily_records, ignore_index=True) + # 每只取最近20个交易日,算均值 + daily_stats = all_daily.groupby('ts_code').agg( + avg_amount=('amount', 'mean'), + avg_vol=('vol', 'mean') + ).reset_index() + daily_map_amt = dict(zip(daily_stats['ts_code'], daily_stats['avg_amount'])) + daily_map_vol = dict(zip(daily_stats['ts_code'], daily_stats['avg_vol'])) + df['avg_amount_wan'] = df['ts_code'].map(daily_map_amt) / 10 # 千元→万元 + df['avg_vol_wan'] = df['ts_code'].map(daily_map_vol) / 10000 # 手→万手 + print(f" ✓ 行情数据: {len(daily_stats)} 只") +else: + print(f" ✗ 行情数据获取失败") + +# ============================================================ +# 3. fund_nav - 批量获取 +# ============================================================ +print(f"\n[3/3] 获取基金净值...") +nav_records = [] +for i in range(0, len(all_codes), batch_size): + batch = all_codes[i:i+batch_size] + ts_codes_str = ','.join(batch) + try: + nav_df = pro.fund_nav(ts_code=ts_codes_str, start_date=start_date, end_date=end_date) + if nav_df is not None and len(nav_df) > 0: + nav_records.append(nav_df) + except Exception as e: + pass + if (i + batch_size) % 500 == 0: + print(f" 进度: {min(i+batch_size, len(all_codes))}/{len(all_codes)}") + time.sleep(0.3) + +if nav_records: + all_nav = pd.concat(nav_records, ignore_index=True) + latest_nav = all_nav.sort_values('nav_date', ascending=False).drop_duplicates(subset=['ts_code'], keep='first') + nav_map = dict(zip(latest_nav['ts_code'], latest_nav['accum_nav'])) + nav_date_map = dict(zip(latest_nav['ts_code'], latest_nav['nav_date'].astype(str))) + df['latest_nav'] = df['ts_code'].map(nav_map) + df['nav_date'] = df['ts_code'].map(nav_date_map) + print(f" ✓ 净值数据: {len(nav_map)} 只") +else: + print(f" ✗ 净值数据获取失败") + +# 用 NAV 精确计算规模 +mask = df['fund_scale_yi'].notna() & df['latest_nav'].notna() +df.loc[mask, 'fund_scale_yi'] = (df.loc[mask, 'fund_scale_yi'] * df.loc[mask, 'latest_nav']).round(2) + +# 保存 +df.to_csv(csv_path, index=False, encoding='utf-8-sig') +print(f"\n已保存: {csv_path}") +print(f"新增字段: fund_scale_yi, avg_amount_wan, avg_vol_wan, latest_nav, nav_date") + +# 统计 +print(f"\n{'='*80}") +print("数据质量") +print(f"{'='*80}") +print(f"基金规模: {df['fund_scale_yi'].notna().sum()}/{len(df)} ({df['fund_scale_yi'].notna().mean()*100:.1f}%)") +print(f"日均成交额: {df['avg_amount_wan'].notna().sum()}/{len(df)} ({df['avg_amount_wan'].notna().mean()*100:.1f}%)") +print(f"最新净值: {df['latest_nav'].notna().sum()}/{len(df)} ({df['latest_nav'].notna().mean()*100:.1f}%)") + +# 轮动策略标的池 +print(f"\n{'='*80}") +print("轮动策略标的池") +print(f"{'='*80}") + +pool_etfs = { + '159915.SZ': '创业板指', '512890.SH': '红利低波', + '159920.SZ': '恒生指数', '513130.SH': '恒生科技', + '513100.SH': '纳指100', '513520.SH': '日经225', + '513030.SH': '德国DAX', '518880.SH': '黄金', + '159980.SZ': '有色金属', '160723.SZ': '原油', +} + +print(f"{'代码':<12} {'名称':<8} {'规模(亿)':>10} {'日成交额(万)':>14} {'净值':>8} {'费率':>6} {'类型':<6}") +print('-' * 75) +for code, name in pool_etfs.items(): + row = df[df['ts_code'] == code] + if len(row) > 0: + r = row.iloc[0] + scale = f"{r['fund_scale_yi']:.1f}" if pd.notna(r['fund_scale_yi']) else 'N/A' + amt = f"{r['avg_amount_wan']:,.0f}" if pd.notna(r['avg_amount_wan']) else 'N/A' + nav = f"{r['latest_nav']:.4f}" if pd.notna(r['latest_nav']) else 'N/A' + fee = f"{r['mgt_fee']}%" if pd.notna(r['mgt_fee']) else 'N/A' + etype = str(r['etf_type']) + print(f"{code:<12} {name:<8} {scale:>10} {amt:>14} {nav:>8} {fee:>6} {etype:<6}") diff --git a/rotation/oil_tracking.py b/rotation/oil_tracking.py new file mode 100644 index 0000000..aacac08 --- /dev/null +++ b/rotation/oil_tracking.py @@ -0,0 +1,129 @@ +"""计算3只原油ETF跟踪WTI原油价格的准确率(使用Flask API数据源)""" +import os, sys +sys.path.insert(0, '/app') +from dotenv import load_dotenv +load_dotenv('/app/.env') + +import pandas as pd +import numpy as np +import requests +import time + +# 绕过系统代理避免SSL EOF +_session = requests.Session() +_session.trust_env = False + +BASE_URL = os.getenv('FLASK_API_URL', 'https://k3s.tokenpluse.xyz') + +def fetch_ohlcv(code, start='2020-01-01', end='2026-06-20'): + """通过Flask API获取OHLCV数据""" + url = f"{BASE_URL}/api/v1/ohlcv" + params = {'code': code, 'start': start, 'end': end} + for attempt in range(3): + try: + resp = _session.get(url, params=params, timeout=120) + if resp.status_code == 200: + data = resp.json() + if 'error' in data or not data.get('data'): + print(f" {code}: 无数据 - {data.get('error', 'empty')}") + return None + df = pd.DataFrame(data['data']) + df['date'] = pd.to_datetime(df['date']) + df = df.set_index('date').sort_index() + df['ret'] = df['close'].pct_change() + cnt = data.get('count', len(df)) + dr = data.get('date_range', {}) + print(f" {code}: {cnt} 条 ({dr.get('start','?')} ~ {dr.get('end','?')})") + return df + else: + print(f" {code}: HTTP {resp.status_code}") + return None + except Exception as e: + if attempt < 2: + time.sleep(2) + continue + print(f" {code}: 失败 - {e}") + return None + return None + +# 1. 获取WTI原油价格 +print("=" * 90) +print("获取WTI原油价格 (CL=F)") +print("=" * 90) +cl_df = fetch_ohlcv('CL=F') + +if cl_df is None or len(cl_df) < 10: + print("WTI数据获取失败,退出") + sys.exit(1) + +# 2. 获取3只原油ETF价格 +etf_codes = { + '160723.SZ': '嘉实原油(WTI*100%)', + '161129.SZ': '易方达原油(标普高盛原油*100%)', + '501018.SH': '南方原油(WTI*60%+BRENT*40%)', +} + +print("\n" + "=" * 90) +print("获取原油ETF价格") +print("=" * 90) +etf_dfs = {} +for code, name in etf_codes.items(): + df = fetch_ohlcv(code) + if df is not None: + etf_dfs[code] = name + +# 3. 计算跟踪准确率 +print("\n" + "=" * 90) +print("跟踪准确率(基于ETF收盘价收益率 vs WTI收盘价日收益率)") +print("=" * 90) + +for code, name in etf_dfs.items(): + etf = etf_dfs[code][['ret']].copy() + etf.columns = ['etf_ret'] + etf.index = etf.index.normalize() + + cl = cl_df[['ret']].copy() + cl.columns = ['cl_ret'] + cl.index = cl.index.normalize() + + m = pd.merge(etf.reset_index(), cl.reset_index(), on='date', how='inner', suffixes=('', '_cl')) + if 'ret' in m.columns and 'ret_cl' in m.columns: + m = m.rename(columns={'ret': 'etf_ret', 'ret_cl': 'cl_ret'}) + m = m[['date', 'etf_ret', 'cl_ret']].dropna() + + if len(m) < 10: + print(f"\n{code} {name}: 数据不足 ({len(m)} 天)") + continue + + corr = m['etf_ret'].corr(m['cl_ret']) + r2 = corr ** 2 + diff = m['etf_ret'] - m['cl_ret'] + te_annual = diff.std() * np.sqrt(252) + cum_etf = (1 + m['etf_ret']).prod() - 1 + cum_cl = (1 + m['cl_ret']).prod() - 1 + bias = diff.mean() + + # 分段: 2024至今 + recent = m[m['date'] >= '2024-01-01'] + if len(recent) > 20: + r2_recent = recent['etf_ret'].corr(recent['cl_ret']) ** 2 + else: + r2_recent = np.nan + + print(f"\n{code} {name}") + print(f" 重叠交易日: {len(m)} 天") + print(f" 全区间 R²: {r2:.4f} ({r2*100:.1f}%)") + if not np.isnan(r2_recent): + print(f" 2024至今 R²: {r2_recent:.4f} ({r2_recent*100:.1f}%)") + else: + print(f" 2024至今 R²: 数据不足") + print(f" 年化跟踪误差: {te_annual*100:.2f}%") + print(f" 日均偏差: {bias*100:.4f}%") + print(f" ETF累计收益: {cum_etf*100:.1f}%") + print(f" WTI累计收益: {cum_cl*100:.1f}%") + print(f" 累计收益差: {(cum_etf-cum_cl)*100:.1f}%") + +print("\n" + "=" * 90) +print("注: 原油ETF为QDII-LOF,净值披露有T+1~T+2延迟,") +print(" 且需通过期货合约展期,与WTI现货价格存在结构性偏差。") +print("=" * 90) diff --git a/rotation/simple_rotation.py b/rotation/simple_rotation.py index 4cd2959..08666e2 100644 --- a/rotation/simple_rotation.py +++ b/rotation/simple_rotation.py @@ -469,6 +469,10 @@ class SimpleRotationStrategy: self.min_hold_days = self.config.rebalance.min_hold_days self.weight_type = self.config.rotation.weight.value # 'equal' or 'rank' + # ETF expansion: max weight per individual ETF + self.etf_max_weight = getattr(self.config.rotation, 'etf_max_weight', 0.25) + self.min_slots = math.ceil(1.0 / self.etf_max_weight) # e.g. 4 for 25% + # Dynamic threshold threshold = self.config.rotation.threshold self.use_dynamic_threshold = (threshold.mode.value == 'dynamic') @@ -487,6 +491,21 @@ class SimpleRotationStrategy: self.code_to_group[asset.signal_source] = asset.group self.trade_code_to_group[asset.trade_source] = asset.group + # ETF expansion pool: signal_code -> [etf_code, ...] from config + self.signal_to_etfs: Dict[str, List[str]] = {} + for code, asset in self.config.asset_pools.assets.items(): + if asset.etf_pool: + self.signal_to_etfs[asset.signal_source] = asset.etf_pool + else: + # Fallback: use trade_source as single ETF + self.signal_to_etfs[asset.signal_source] = [asset.trade_source] + + # Log ETF pool summary + for signal_code, etfs in self.signal_to_etfs.items(): + asset = self.config.asset_pools.assets.get(signal_code) + name = asset.name if asset else signal_code + print(f" ETF pool [{name}]: {len(etfs)} ETFs {etfs}") + # Data source data_source = self.config.data.sources[0] base_url = data_source.url or 'https://k3s.tokenpluse.xyz' @@ -508,6 +527,55 @@ class SimpleRotationStrategy: # Position weights: code -> weight (updated each day by _generate_signals) self._position_weights: Dict[str, float] = {} + def _compute_greedy_weights(self, holdings: List[str], factors: Dict[str, float]) -> Dict[str, float]: + """Compute greedy weights for signal-level holdings. + + Greedy Algorithm: + 1. Each index has absorption capacity = min(n_etfs, ceil(1/max_weight)) × max_weight + 2. Iterate through holdings in order (sorted by momentum) + 3. Each index absorbs up to its capacity + 4. Remaining weight flows to next index + + Example (select_num=1, max_weight=0.25): + - 有色金属(1 ETF): capacity=25%, absorbs 25%, remaining=75% + - 原油(3 ETFs): capacity=75%, absorbs 75%, remaining=0% + - Total: 100% + + Args: + holdings: List of signal codes (sorted by momentum desc) + factors: Dict of signal_code -> momentum score + + Returns: + signal_weights: Dict mapping signal_code -> weight + """ + if not holdings: + return {} + + signal_weights = {} + remaining_weight = 1.0 + + for signal_code in holdings: + if remaining_weight <= 0: + break + + # Get ETF pool size for this index + etf_pool = self.signal_to_etfs.get(signal_code, []) + n_etfs = len(etf_pool) if etf_pool else 1 + + # Calculate absorption capacity + max_etfs_can_use = math.ceil(1.0 / self.etf_max_weight) # e.g. 4 for 25% + n_to_use = min(n_etfs, max_etfs_can_use) + capacity = n_to_use * self.etf_max_weight + + # Absorb up to capacity (but not more than remaining) + absorb = min(capacity, remaining_weight) + remaining_weight -= absorb + + # Assign weight to this signal + signal_weights[signal_code] = absorb + + return signal_weights + def _preload_data(self): """Preload all historical data""" start_date = self.config.backtest.start_date @@ -728,13 +796,69 @@ class SimpleRotationStrategy: return self._position_weights[code] return 1.0 / n_unique if n_unique > 0 else 0.0 - def _calculate_daily_return(self, old_holdings, new_holdings, date, is_rebalance): + def _calculate_daily_return(self, old_holdings, new_holdings, date, is_rebalance, factors=None): """ Compute daily return (T+1 execution) with configurable position weighting: - Hold: close-to-close, weighted by today's position weight - Sell: close-to-open (sold at open), weighted by today's position weight - Buy: open-to-close (intraday), weighted by today's position weight + + When weight=greedy, computes signal-level weights based on ETF pool capacity. + Otherwise, uses signal_to_trade mapping with position_weights. """ + # Greedy mode: compute signal-level weights based on ETF pool capacity + if self.weight_type == 'greedy': + factors = factors or {} + old_weights = self._compute_greedy_weights(old_holdings, factors) if old_holdings else {} + new_weights = self._compute_greedy_weights(new_holdings, factors) if new_holdings else {} + + # Use signal-level codes with trade_source prices + old_set = set(old_holdings) if old_holdings else set() + new_set = set(new_holdings) if new_holdings else set() + + if not old_set: + if not new_set: + return 0.0 + ret = 0.0 + for code in new_set: + tc = self.signal_to_trade.get(code, code) + p = self._get_etf_prices(tc, date) + w = new_weights.get(code, 0.0) + if p and p['open'] > 0: + ret += w * (p['close'] - p['open']) / p['open'] + if is_rebalance: + ret -= self.trade_cost + return ret + + daily_return = 0.0 + + for code in old_set: + tc = self.signal_to_trade.get(code, code) + p = self._get_etf_prices(tc, date) + if p is None or p['prev_close'] == 0: + continue + w = old_weights.get(code, 0.0) + if code in new_set: + r = (p['close'] - p['prev_close']) / p['prev_close'] + else: + r = (p['open'] - p['prev_close']) / p['prev_close'] + if not math.isnan(r): + daily_return += w * r + + for code in new_set - old_set: + tc = self.signal_to_trade.get(code, code) + p = self._get_etf_prices(tc, date) + w = new_weights.get(code, 0.0) + if p and p['open'] > 0 and not math.isnan(p['close']): + r = (p['close'] - p['open']) / p['open'] + if not math.isnan(r): + daily_return += w * r + + if is_rebalance: + daily_return -= self.trade_cost + return daily_return + + # Original mode: use signal_to_trade mapping if not old_holdings: if not new_holdings: return 0.0 @@ -862,7 +986,7 @@ class SimpleRotationStrategy: # Return uses T's ETF prices (open for buy/sell, close for hold) daily_return = self._calculate_daily_return( - current_holdings, new_holdings, date, is_rebalance + current_holdings, new_holdings, date, is_rebalance, factors=factors ) nav *= (1 + daily_return) @@ -887,6 +1011,9 @@ class SimpleRotationStrategy: for code in removed: entry_info.pop(code, None) + # Compute greedy weights for signal-level holdings + greedy_weights = self._compute_greedy_weights(new_holdings, factors) if self.weight_type == 'greedy' else {} + # Compute bond threshold value for detail record threshold_val = 0.0 if self.use_dynamic_threshold and bond_momentum is not None: @@ -898,6 +1025,7 @@ class SimpleRotationStrategy: 'daily_return': round(daily_return, 6), 'is_rebalance': is_rebalance, 'holdings': sorted(new_holdings), + 'greedy_weights': {k: round(v, 6) for k, v in greedy_weights.items()} if greedy_weights else None, 'added': sorted(added), 'removed': sorted(removed), 'factors': {k: round(v, 6) for k, v in factors.items()}, diff --git a/rotation/tracking_error_full.py b/rotation/tracking_error_full.py new file mode 100644 index 0000000..8d90ef9 --- /dev/null +++ b/rotation/tracking_error_full.py @@ -0,0 +1,399 @@ +""" +ETF跟踪误差全量计算 +- 覆盖轮动策略标的池全部10个标的 +- 数据源分层: + - A股指数 → Tushare index_daily + - 商品 → Tushare fut_daily(主力合约) + - 海外指数 → Flask API (yfinance) +- 与天天基金数据对比校验 +""" +import os +import sys +import time +import json +import pandas as pd +import numpy as np +from pathlib import Path +from datetime import datetime, timedelta + +PROJECT_ROOT = Path(__file__).parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) +from dotenv import load_dotenv +load_dotenv(PROJECT_ROOT / '.env') + +import tushare as ts +from datasource.flask_api_source import FlaskAPIDataSource + + +# ============================================================ +# 轮动策略标的池:全部10个标的 +# ============================================================ +POOL_CONFIG = { + # --- A股指数(Tushare index_daily)--- + '399006.SZ': { + 'name': '创业板指', 'current_etf': '159915.SZ', 'group': 'A', + 'benchmark_type': 'tushare_index', 'benchmark_code': '399006.SZ', + }, + 'H30269.CSI': { + 'name': '红利低波', 'current_etf': '512890.SH', 'group': 'A', + 'benchmark_type': 'tushare_index', 'benchmark_code': 'H30269.CSI', + }, + # --- 商品(Tushare fut_daily 主力合约)--- + 'GC=F': { + 'name': '黄金', 'current_etf': '518880.SH', 'group': 'COMMODITY', + 'benchmark_type': 'tushare_futures', 'benchmark_code': 'AU.SHF', + }, + 'HG=F': { + 'name': '有色金属', 'current_etf': '159980.SZ', 'group': 'COMMODITY', + 'benchmark_type': 'tushare_futures', 'benchmark_code': 'CU.SHF', + }, + # --- 海外指数(Flask API / yfinance)--- + 'HSI': { + 'name': '恒生指数', 'current_etf': '159920.SZ', 'group': 'HK', + 'benchmark_type': 'flask_api', 'benchmark_code': '^HSI', + }, + 'HSTECH.HK': { + 'name': '恒生科技', 'current_etf': '513130.SH', 'group': 'HK', + 'benchmark_type': 'flask_api', 'benchmark_code': 'HSTECH.HK', + }, + 'NDX': { + 'name': '纳指100', 'current_etf': '513100.SH', 'group': 'US', + 'benchmark_type': 'flask_api', 'benchmark_code': '^NDX', + }, + 'N225': { + 'name': '日经225', 'current_etf': '513520.SH', 'group': 'JP', + 'benchmark_type': 'flask_api', 'benchmark_code': '^N225', + }, + 'GDAXI': { + 'name': '德国DAX', 'current_etf': '513030.SH', 'group': 'EU', + 'benchmark_type': 'flask_api', 'benchmark_code': '^GDAXI', + }, + # --- 原油(用最早ETF做基准,无可靠数据源)--- + 'CL=F': { + 'name': '原油', 'current_etf': '160723.SZ', 'group': 'COMMODITY', + 'benchmark_type': 'earliest_etf', 'benchmark_code': '159518.SZ', + }, +} + + +# ============================================================ +# 数据获取函数 +# ============================================================ + +def get_etf_nav_tushare(pro, etf_code, start_date, end_date): + """获取ETF累计净值(Tushare fund_nav)""" + try: + df = pro.fund_nav( + ts_code=etf_code, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', '') + ) + if df is not None and len(df) > 0: + df['date'] = pd.to_datetime(df['nav_date']) + df = df.set_index('date').sort_index() + return df['accum_nav'].astype(float) + except Exception as e: + pass + return None + + +def get_benchmark_tushare_index(pro, index_code, start_date, end_date): + """获取A股指数收盘价(Tushare index_daily)""" + try: + df = pro.index_daily( + ts_code=index_code, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', '') + ) + if df is not None and len(df) > 0: + df['date'] = pd.to_datetime(df['trade_date']) + df = df.set_index('date').sort_index() + return df['close'].astype(float) + except Exception as e: + pass + return None + + +def get_benchmark_tushare_futures(pro, fut_code, start_date, end_date): + """获取期货主力合约收盘价(Tushare fut_daily)""" + try: + df = pro.fut_daily( + ts_code=fut_code, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', '') + ) + if df is not None and len(df) > 0: + df['date'] = pd.to_datetime(df['trade_date']) + df = df.set_index('date').sort_index() + return df['close'].astype(float) + except Exception as e: + pass + return None + + +def get_benchmark_flask_api(flask_source, yf_code, start_date, end_date): + """获取海外指数数据(Flask API / yfinance)""" + try: + df = flask_source.fetch(yf_code, start_date, end_date) + if df is not None and len(df) > 0: + return df['close'].astype(float) + except Exception as e: + pass + return None + + +def get_etf_close_tushare(pro, etf_code, start_date, end_date): + """获取ETF收盘价(用于原油等无基准数据的情况)""" + try: + df = pro.fund_daily( + ts_code=etf_code, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', '') + ) + if df is not None and len(df) > 0: + df['date'] = pd.to_datetime(df['trade_date']) + df = df.set_index('date').sort_index() + return df['close'].astype(float) + except Exception as e: + pass + return None + + +# ============================================================ +# 跟踪误差计算 +# ============================================================ + +def calculate_tracking_error(etf_nav, benchmark_close): + """ + 计算跟踪误差 + 公式:STDEV(每日偏离度) × √252 + 每日偏离度 = ETF净值收益率 - 基准收益率 + """ + if etf_nav is None or benchmark_close is None: + return None + + etf_ret = etf_nav.pct_change().dropna() + bench_ret = benchmark_close.pct_change().dropna() + + common = etf_ret.index.intersection(bench_ret.index) + if len(common) < 20: + return None + + e = etf_ret.loc[common] + b = bench_ret.loc[common] + + daily_deviation = e - b + tracking_error = daily_deviation.std() * np.sqrt(252) + correlation = e.corr(b) + r_squared = correlation ** 2 + + etf_cum = (1 + e).prod() - 1 + bench_cum = (1 + b).prod() - 1 + excess = etf_cum - bench_cum + + return { + 'annual_tracking_error': round(tracking_error * 100, 4), + 'correlation': round(correlation, 6), + 'r_squared': round(r_squared, 6), + 'etf_cum_return': round(etf_cum * 100, 2), + 'benchmark_cum_return': round(bench_cum * 100, 2), + 'excess_return': round(excess * 100, 2), + 'common_days': len(common), + } + + +# ============================================================ +# 主流程 +# ============================================================ + +def main(): + print("=" * 80) + print("ETF跟踪误差全量计算(10个标的)") + print(f"分析日期: {datetime.now().strftime('%Y-%m-%d')}") + print("=" * 80) + + # 初始化数据源 + pro = ts.pro_api(os.getenv('TUSHARE_TOKEN')) + flask_source = FlaskAPIDataSource() + + # 分析区间:最近1年 + end_date = datetime.now().strftime('%Y-%m-%d') + start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') + print(f"计算区间: {start_date} ~ {end_date}") + + # 加载天天基金数据(用于校验 + 获取ETF列表) + eastmoney_path = PROJECT_ROOT / 'rotation' / 'results' / 'etf_competitor_analysis.json' + eastmoney_data = {} + if eastmoney_path.exists(): + with open(eastmoney_path, 'r', encoding='utf-8') as f: + eastmoney_data = json.load(f) + print(f"已加载天天基金数据: {len(eastmoney_data)} 个标的") + + # 按基准类型分组获取(减少重复请求) + benchmark_cache = {} # benchmark_key -> Series + + results = {} + + for key, info in POOL_CONFIG.items(): + index_name = info['name'] + current_etf = info['current_etf'] + btype = info['benchmark_type'] + bcode = info['benchmark_code'] + + print(f"\n{'='*60}") + print(f"=== {index_name} ({key}) | 基准类型: {btype} ===") + print(f"{'='*60}") + + # Step 1: 获取基准数据(带缓存) + bench_key = f"{btype}:{bcode}" + if bench_key not in benchmark_cache: + print(f" 获取基准数据: {bcode} ({btype})") + if btype == 'tushare_index': + benchmark = get_benchmark_tushare_index(pro, bcode, start_date, end_date) + elif btype == 'tushare_futures': + benchmark = get_benchmark_tushare_futures(pro, bcode, start_date, end_date) + elif btype == 'flask_api': + benchmark = get_benchmark_flask_api(flask_source, bcode, start_date, end_date) + elif btype == 'earliest_etf': + benchmark = get_etf_close_tushare(pro, bcode, start_date, end_date) + else: + benchmark = None + + if benchmark is not None: + benchmark_cache[bench_key] = benchmark + print(f" ✓ 基准数据: {len(benchmark)} 天") + else: + print(f" ✗ 基准数据获取失败") + benchmark_cache[bench_key] = None + else: + benchmark = benchmark_cache[bench_key] + print(f" (缓存) 基准数据: {len(benchmark)} 天") + + if benchmark is None: + print(f" 跳过(无基准数据)") + continue + + # Step 2: 获取该标的下所有ETF + etf_list = [] + if key in eastmoney_data: + for etf in eastmoney_data[key]['etfs']: + etf_list.append({ + 'code': etf['ts_code'], + 'name': etf['name'], + 'eastmoney_te': etf.get('annual_tracking_error', 'N/A'), + }) + + print(f" 共 {len(etf_list)} 只ETF需要计算") + + # Step 3: 逐只计算跟踪误差 + etf_results = [] + + for etf_info in etf_list: + etf_code = etf_info['code'] + etf_name = etf_info['name'] + + # 获取ETF NAV(或收盘价) + if btype == 'earliest_etf': + # 原油:用收盘价对比收盘价 + etf_data = get_etf_close_tushare(pro, etf_code, start_date, end_date) + else: + etf_data = get_etf_nav_tushare(pro, etf_code, start_date, end_date) + + if etf_data is None or len(etf_data) < 20: + continue + + tracking = calculate_tracking_error(etf_data, benchmark) + if tracking is None: + continue + + result = { + 'ts_code': etf_code, + 'name': etf_name, + 'tushare_te': tracking['annual_tracking_error'], + 'tushare_r2': tracking['r_squared'], + 'tushare_correlation': tracking['correlation'], + 'tushare_excess_return': tracking['excess_return'], + 'tushare_common_days': tracking['common_days'], + 'eastmoney_te': etf_info['eastmoney_te'], + 'is_current': etf_code == current_etf, + } + + etf_results.append(result) + time.sleep(0.05) + + # 按跟踪误差排序 + etf_results.sort(key=lambda x: x['tushare_te']) + + results[key] = { + 'index_name': index_name, + 'current_etf': current_etf, + 'benchmark_type': btype, + 'benchmark_code': bcode, + 'group': info['group'], + 'etf_count': len(etf_results), + 'etfs': etf_results, + } + + # 打印结果 + print(f"\n 计算完成: {len(etf_results)} 只ETF") + print(f" {'代码':<12} {'名称':<20} {'TE':<10} {'天天基金TE':<12} {'R²':<8}") + print(f" {'-'*70}") + + for etf in etf_results[:10]: + te_str = f"{etf['tushare_te']:.4f}%" + em_te = etf['eastmoney_te'] + marker = " ★" if etf['is_current'] else "" + print(f" {etf['ts_code']:<12} {etf['name'][:20]:<20} {te_str:<10} {em_te:<12} {etf['tushare_r2']:<8}{marker}") + + if len(etf_results) > 10: + print(f" ... 还有 {len(etf_results) - 10} 只") + + # ============================================================ + # 保存结果 + # ============================================================ + output_dir = PROJECT_ROOT / 'rotation' / 'results' + output_dir.mkdir(exist_ok=True) + output_path = output_dir / 'tracking_error_full.json' + + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2, default=str) + + print(f"\n{'='*80}") + print(f"结果已保存: {output_path}") + print(f"{'='*80}") + + # ============================================================ + # 汇总校验 + # ============================================================ + print(f"\n{'='*80}") + print("全量校验汇总") + print(f"{'='*80}") + + for key, data in results.items(): + matched = [e for e in data['etfs'] + if e['eastmoney_te'] and e['eastmoney_te'] not in ['N/A', '--']] + + print(f"\n--- {data['index_name']} ({data['benchmark_type']}) ---") + print(f" ETF总数: {data['etf_count']} | 天天基金有数据: {len(matched)}") + + if matched: + diffs = [] + for etf in matched: + try: + em_te = float(etf['eastmoney_te'].replace('%', '')) + diffs.append(etf['tushare_te'] - em_te) + except: + pass + if diffs: + print(f" 平均差异: {np.mean(diffs):+.4f}% | 最大差异: {max(diffs, key=abs):+.4f}%") + + # 打印前3名 + top3 = data['etfs'][:3] + print(f" Top3 (TE最低):") + for i, etf in enumerate(top3, 1): + marker = " ★当前" if etf['is_current'] else "" + print(f" {i}. {etf['ts_code']} {etf['name']} TE={etf['tushare_te']:.4f}%{marker}") + + +if __name__ == '__main__': + main() diff --git a/rotation/tracking_error_validation.py b/rotation/tracking_error_validation.py new file mode 100644 index 0000000..816a78d --- /dev/null +++ b/rotation/tracking_error_validation.py @@ -0,0 +1,289 @@ +""" +ETF跟踪误差计算与校验 +- 使用Tushare数据计算ETF跟踪误差(基于NAV) +- 与天天基金数据对比校验 +""" +import os +import sys +import time +import json +import pandas as pd +import numpy as np +from pathlib import Path +from datetime import datetime, timedelta + +PROJECT_ROOT = Path(__file__).parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) +from dotenv import load_dotenv +load_dotenv(PROJECT_ROOT / '.env') + +import tushare as ts + + +# 轮动策略标的池 +POOL_INDEX_MAP = { + '399006.SZ': { + 'name': '创业板指', 'current_etf': '159915.SZ', 'group': 'A', + 'index_code': '399006.SZ', + }, + 'H30269.CSI': { + 'name': '红利低波', 'current_etf': '512890.SH', 'group': 'A', + 'index_code': 'H30269.CSI', + }, +} + + +def get_etf_nav_data(pro, etf_code, start_date, end_date): + """ + 获取ETF净值数据(使用fund_nav接口) + 注意:ETF应使用accum_nav(累计净值)而非unit_nav(单位净值) + """ + try: + df = pro.fund_nav( + ts_code=etf_code, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', '') + ) + if df is not None and len(df) > 0: + df['date'] = pd.to_datetime(df['nav_date']) + df = df.set_index('date').sort_index() + # 使用累计净值 + return df['accum_nav'].astype(float) + except Exception as e: + print(f" 获取 {etf_code} NAV失败: {e}") + return None + + +def get_index_data(pro, index_code, start_date, end_date): + """获取指数日线数据""" + try: + df = pro.index_daily( + ts_code=index_code, + start_date=start_date.replace('-', ''), + end_date=end_date.replace('-', '') + ) + if df is not None and len(df) > 0: + df['date'] = pd.to_datetime(df['trade_date']) + df = df.set_index('date').sort_index() + return df['close'].astype(float) + except Exception as e: + print(f" 获取指数 {index_code} 失败: {e}") + return None + + +def calculate_tracking_error(etf_nav, index_close): + """ + 计算跟踪误差 + 公式:STDEV(每日偏离度) × √252 + 每日偏离度 = ETF净值收益率 - 指数收益率 + """ + if etf_nav is None or index_close is None: + return None + + # 计算收益率 + etf_ret = etf_nav.pct_change().dropna() + idx_ret = index_close.pct_change().dropna() + + # 对齐日期 + common = etf_ret.index.intersection(idx_ret.index) + if len(common) < 20: + return None + + e = etf_ret.loc[common] + i = idx_ret.loc[common] + + # 每日偏离度 + daily_deviation = e - i + + # 跟踪误差 = 标准差 × √252 + tracking_error = daily_deviation.std() * np.sqrt(252) + + # 其他指标 + correlation = e.corr(i) + r_squared = correlation ** 2 + + # 累计收益 + etf_cum = (1 + e).prod() - 1 + idx_cum = (1 + i).prod() - 1 + excess = etf_cum - idx_cum + + return { + 'annual_tracking_error': round(tracking_error * 100, 4), # % + 'correlation': round(correlation, 6), + 'r_squared': round(r_squared, 6), + 'etf_cum_return': round(etf_cum * 100, 2), # % + 'index_cum_return': round(idx_cum * 100, 2), # % + 'excess_return': round(excess * 100, 2), # % + 'common_days': len(common), + } + + +def main(): + print("=" * 80) + print("ETF跟踪误差计算与校验") + print(f"分析日期: {datetime.now().strftime('%Y-%m-%d')}") + print("=" * 80) + + # 初始化 + pro = ts.pro_api(os.getenv('TUSHARE_TOKEN')) + + # 分析时间范围:最近1年 + end_date = datetime.now().strftime('%Y-%m-%d') + start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') + print(f"计算区间: {start_date} ~ {end_date}") + + # 加载天天基金数据 + eastmoney_path = PROJECT_ROOT / 'rotation' / 'results' / 'etf_competitor_analysis.json' + eastmoney_data = {} + if eastmoney_path.exists(): + with open(eastmoney_path, 'r', encoding='utf-8') as f: + eastmoney_data = json.load(f) + print(f"已加载天天基金数据: {len(eastmoney_data)} 个指数") + + # 对每个指数计算跟踪误差 + print(f"\n开始计算跟踪误差...") + + results = {} + + for key, info in POOL_INDEX_MAP.items(): + index_name = info['name'] + index_code = info['index_code'] + current_etf = info['current_etf'] + + print(f"\n{'='*60}") + print(f"=== {index_name} ({key}) ===") + print(f"{'='*60}") + + # 获取指数数据 + print(f" 获取指数数据: {index_code}") + index_data = get_index_data(pro, index_code, start_date, end_date) + if index_data is None: + print(f" ✗ 指数数据获取失败") + continue + print(f" ✓ 指数数据: {len(index_data)} 天") + + # 获取该指数下所有ETF的NAV + etf_list = [] + if key in eastmoney_data: + for etf in eastmoney_data[key]['etfs']: + etf_list.append({ + 'code': etf['ts_code'], + 'name': etf['name'], + 'eastmoney_te': etf.get('annual_tracking_error', 'N/A'), + }) + + print(f" 共 {len(etf_list)} 只ETF需要计算") + + etf_results = [] + + for etf_info in etf_list: + etf_code = etf_info['code'] + etf_name = etf_info['name'] + + # 获取ETF NAV + etf_nav = get_etf_nav_data(pro, etf_code, start_date, end_date) + if etf_nav is None or len(etf_nav) < 20: + continue + + # 计算跟踪误差 + tracking = calculate_tracking_error(etf_nav, index_data) + if tracking is None: + continue + + result = { + 'ts_code': etf_code, + 'name': etf_name, + 'tushare_te': tracking['annual_tracking_error'], + 'tushare_r2': tracking['r_squared'], + 'tushare_correlation': tracking['correlation'], + 'tushare_excess_return': tracking['excess_return'], + 'tushare_common_days': tracking['common_days'], + 'eastmoney_te': etf_info['eastmoney_te'], + 'is_current': etf_code == current_etf, + } + + etf_results.append(result) + time.sleep(0.1) + + # 按跟踪误差排序 + etf_results.sort(key=lambda x: x['tushare_te']) + + results[key] = { + 'index_name': index_name, + 'index_code': index_code, + 'current_etf': current_etf, + 'etf_count': len(etf_results), + 'etfs': etf_results, + } + + # 打印结果 + print(f"\n 计算完成: {len(etf_results)} 只ETF") + print(f" {'代码':<12} {'名称':<20} {'Tushare TE':<12} {'天天基金 TE':<12} {'差异':<10} {'R²':<8}") + print(f" {'-'*80}") + + for etf in etf_results[:10]: + tushare_te = f"{etf['tushare_te']:.4f}%" + eastmoney_te = etf['eastmoney_te'] + + # 计算差异 + diff = 'N/A' + if eastmoney_te and eastmoney_te != 'N/A' and eastmoney_te != '--': + try: + em_te = float(eastmoney_te.replace('%', '')) + diff_val = etf['tushare_te'] - em_te + diff = f"{diff_val:+.4f}%" + except: + pass + + marker = " ★" if etf['is_current'] else "" + print(f" {etf['ts_code']:<12} {etf['name'][:20]:<20} {tushare_te:<12} {eastmoney_te:<12} {diff:<10} {etf['tushare_r2']:<8}{marker}") + + if len(etf_results) > 10: + print(f" ... 还有 {len(etf_results) - 10} 只") + + # 保存结果 + output_dir = PROJECT_ROOT / 'rotation' / 'results' + output_dir.mkdir(exist_ok=True) + output_path = output_dir / 'tracking_error_validation.json' + + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2, default=str) + + print(f"\n{'='*80}") + print(f"结果已保存: {output_path}") + print(f"{'='*80}") + + # 汇总统计 + print(f"\n{'='*80}") + print("校验汇总") + print(f"{'='*80}") + + for key, data in results.items(): + print(f"\n--- {data['index_name']} ---") + print(f" 指数代码: {data['index_code']}") + print(f" 计算ETF数: {data['etf_count']}") + + # 统计有天天基金数据的ETF + matched = [e for e in data['etfs'] if e['eastmoney_te'] and e['eastmoney_te'] not in ['N/A', '--']] + print(f" 天天基金有数据: {len(matched)}") + + if matched: + # 计算平均差异 + diffs = [] + for etf in matched: + try: + em_te = float(etf['eastmoney_te'].replace('%', '')) + diff = etf['tushare_te'] - em_te + diffs.append(diff) + except: + pass + + if diffs: + avg_diff = np.mean(diffs) + max_diff = max(diffs, key=abs) + print(f" 平均差异: {avg_diff:+.4f}%") + print(f" 最大差异: {max_diff:+.4f}%") + + +if __name__ == '__main__': + main()