etf/rotation/experiments/task3_rebalance_analysis.py

"""
Task 3: 调仓逻辑问题诊断

分析维度：
3.1 最小持仓期模拟 - 对比 3/5/10 天最小持仓期的效果
3.2 等权 vs 波动率加权 - 评估风险贡献偏斜
3.3 分组竞争机制 - 对比"取消分组"vs"当前分组"的收益差异
"""

import ast
import sys
from pathlib import Path
from collections import defaultdict
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd

sys.path.insert(0, str(Path(__file__).parent.parent.parent))

from rotation.experiments.common import (
    load_nav, load_signals, load_detail_days, load_detail_meta,
    print_section, compute_drawdown, compute_sharpe,
    compute_annual_return,
)


def simulate_min_hold(days: List[dict], min_hold: int) -> dict:
    """模拟最小持仓期：在调仓后至少持有 min_hold 天。

    简化模型：遍历每日记录，如果距上次调仓不足 min_hold 天，则忽略信号变化。
    返回模拟后的统计指标。
    """
    if not days:
        return {}

    trade_cost = 0.001
    current_holdings = list(days[0].get('holdings', []))
    nav = 1.0
    days_since_rebalance = 0
    rebalance_count = 0
    simulated_returns = []  # 新列表，不引用 days 内部数据

    for i, day in enumerate(days):
        daily_return = day.get('daily_return', 0)
        new_holdings = day.get('holdings', [])
        is_orig_rebalance = day.get('is_rebalance', False)

        # 模拟：如果距上次调仓不足 min_hold 天，不执行调仓
        should_rebalance = is_orig_rebalance and days_since_rebalance >= min_hold

        if should_rebalance:
            # 执行调仓：使用原始收益（已包含交易成本）
            nav *= (1 + daily_return)
            rebalance_count += 1
            days_since_rebalance = 0
        elif is_orig_rebalance and days_since_rebalance < min_hold:
            # 信号变化但被最小持仓期阻止：加回被扣除的交易成本
            approx_return = daily_return + trade_cost
            nav *= (1 + approx_return)
            days_since_rebalance += 1
            simulated_returns.append(approx_return)
            continue
        else:
            nav *= (1 + daily_return)
            days_since_rebalance += 1

        simulated_returns.append(daily_return)

    n = len(days)
    total_return = nav - 1
    annual_return = (1 + total_return) ** (252 / n) - 1 if n > 0 else 0
    ret_series = pd.Series(simulated_returns)
    # 近似 NAV 序列用于回撤计算
    nav_series = pd.Series(simulated_returns).add(1).cumprod()
    max_dd = compute_drawdown(nav_series).min()
    sharpe = compute_sharpe(ret_series)

    return {
        'min_hold': min_hold,
        'total_return': total_return,
        'annual_return': annual_return,
        'max_drawdown': max_dd,
        'sharpe': sharpe,
        'rebalance_count': rebalance_count,
    }


def analyze_min_hold_days(days: List[dict]):
    """3.1 最小持仓期模拟"""
    print_section("3.1 最小持仓期模拟")

    results = []
    for min_hold in [1, 3, 5, 10]:
        r = simulate_min_hold(days, min_hold)
        results.append(r)
        print(f"  最小持仓期={min_hold}天: 累计={r['total_return']:+.2%}, "
              f"年化={r['annual_return']:+.2%}, 最大回撤={r['max_drawdown']:.2%}, "
              f"夏普={r['sharpe']:.2f}, 调仓={r['rebalance_count']}次")

    return results


def analyze_volatility_weighting(days: List[dict]):
    """3.2 等权 vs 波动率加权 - 风险贡献分析"""
    print_section("3.2 风险贡献分析 (等权 vs 波动率加权)")

    # 收集每个资产在被持有期间的日收益
    asset_returns = defaultdict(list)
    for day in days:
        for code, asset in day.get('assets', {}).items():
            if asset.get('is_held') and asset.get('etf_return_ctc') is not None:
                asset_returns[code].append(asset['etf_return_ctc'])

    print("  各资产持有期间日收益波动率:")
    volatilities = {}
    for code in sorted(asset_returns.keys()):
        rets = asset_returns[code]
        if len(rets) < 10:
            continue
        vol = np.std(rets) * np.sqrt(252)
        mean_ret = np.mean(rets) * 252
        volatilities[code] = vol
        # 等权下的风险贡献（简化：假设等权 1/N）
        print(f"    {code}: 年化波动率={vol:.2%}, 年化收益={mean_ret:+.2%}, "
              f"持有天数={len(rets)}, Sharpe={mean_ret/vol:.2f}" if vol > 0 else
              f"    {code}: 年化波动率={vol:.2%}, 持有天数={len(rets)}")

    # 计算等权组合的风险贡献
    print(f"\n  等权组合风险贡献估算 (假设持有 Top3 等权):")
    # 找最常见的 3 资产组合
    combo_counter = defaultdict(int)
    for day in days:
        holdings = tuple(sorted(day.get('holdings', [])))
        if holdings:
            combo_counter[holdings] += 1

    top_combos = sorted(combo_counter.items(), key=lambda x: -x[1])[:5]
    print("  最常见的持仓组合:")
    for combo, count in top_combos:
        print(f"    {combo}: {count} 天 ({count/len(days)*100:.1f}%)")

    # 波动率倒数加权 vs 等权的理论风险贡献对比
    if len(volatilities) >= 3:
        codes_with_vol = {c: v for c, v in volatilities.items() if v > 0 and c != '931862.CSI'}
        if len(codes_with_vol) >= 3:
            codes_list = list(codes_with_vol.keys())
            vols = np.array([codes_with_vol[c] for c in codes_list])
            n = len(codes_list)

            # 等权
            eq_weights = np.ones(n) / n
            eq_risk_contrib = eq_weights * vols  # 简化
            eq_risk_pct = eq_risk_contrib / eq_risk_contrib.sum() * 100

            # 波动率倒数加权
            inv_vol = 1.0 / vols
            iv_weights = inv_vol / inv_vol.sum()
            iv_risk_contrib = iv_weights * vols
            iv_risk_pct = iv_risk_contrib / iv_risk_contrib.sum() * 100

            print(f"\n  风险贡献对比 (全部非债券资产):")
            print(f"    {'资产':<15} {'波动率':>8} {'等权风险%':>10} {'反波动率风险%':>14}")
            for i, code in enumerate(codes_list):
                print(f"    {code:<15} {vols[i]:>7.2%} {eq_risk_pct[i]:>9.1f}% {iv_risk_pct[i]:>13.1f}%")

    return {'volatilities': volatilities}


def analyze_group_mechanism(days: List[dict], meta: dict):
    """3.3 分组竞争机制分析"""
    print_section("3.3 分组竞争机制分析")

    # 从 config 获取分组信息
    group_map = {
        '399006.SZ': 'A', 'H30269.CSI': 'A',
        'NDX': 'US', 'N225': 'JP', 'GDAXI': 'EU',
        'HSI': 'HK', 'HSTECH.HK': 'HK',
        'GC=F': 'COMMODITY', 'CL=F': 'COMMODITY', 'HG=F': 'COMMODITY',
        '931862.CSI': 'BOND',
    }

    # 统计每组被选中的频率
    group_hold_count = defaultdict(int)
    total_days = 0

    for day in days:
        total_days += 1
        holdings = day.get('holdings', [])
        groups_held = set()
        for code in holdings:
            g = group_map.get(code, 'UNKNOWN')
            if g != 'BOND':
                groups_held.add(g)
                group_hold_count[g] += 1

    print("  各组被选中天数 (每次调仓选3个):")
    for g in ['A', 'US', 'JP', 'EU', 'HK', 'COMMODITY']:
        count = group_hold_count.get(g, 0)
        print(f"    {g}: {count} 天 ({count/total_days*100:.1f}%)")

    # 分析同组两个标的都强但只能选一个的情况
    # 以 A 组为例 (399006.SZ + H30269.CSI)
    print(f"\n  A 组内部竞争分析 (399006.SZ vs H30269.CSI):")
    both_above = 0
    a_wins = 0
    h_wins = 0
    for day in days:
        assets = day.get('assets', {})
        a_asset = assets.get('399006.SZ', {})
        h_asset = assets.get('H30269.CSI', {})
        a_m = a_asset.get('momentum')
        h_m = h_asset.get('momentum')
        threshold = a_asset.get('threshold', 0)

        if a_m is not None and h_m is not None and a_m >= threshold and h_m >= threshold:
            both_above += 1
            if a_m > h_m:
                a_wins += 1
            else:
                h_wins += 1

    print(f"    两标的动量都超过阈值的天数: {both_above}")
    print(f"    399006.SZ 胜出: {a_wins} ({a_wins/both_above*100:.1f}%)" if both_above > 0 else "")
    print(f"    H30269.CSI 胜出: {h_wins} ({h_wins/both_above*100:.1f}%)" if both_above > 0 else "")

    # HK 组分析
    print(f"\n  HK 组内部竞争分析 (HSI vs HSTECH.HK):")
    both_above_hk = 0
    hsi_wins = 0
    hstech_wins = 0
    for day in days:
        assets = day.get('assets', {})
        hsi = assets.get('HSI', {})
        hstech = assets.get('HSTECH.HK', {})
        hsi_m = hsi.get('momentum')
        hstech_m = hstech.get('momentum')
        threshold = hsi.get('threshold', 0)

        if hsi_m is not None and hstech_m is not None and hsi_m >= threshold and hstech_m >= threshold:
            both_above_hk += 1
            if hsi_m > hstech_m:
                hsi_wins += 1
            else:
                hstech_wins += 1

    print(f"    两标的动量都超过阈值的天数: {both_above_hk}")
    if both_above_hk > 0:
        print(f"    HSI 胜出: {hsi_wins} ({hsi_wins/both_above_hk*100:.1f}%)")
        print(f"    HSTECH 胜出: {hstech_wins} ({hstech_wins/both_above_hk*100:.1f}%)")

    # 商品组分析（3个标的）
    print(f"\n  COMMODITY 组分析 (GC=F vs CL=F vs HG=F):")
    commodity_counts = defaultdict(int)
    for day in days:
        assets = day.get('assets', {})
        valid = {}
        threshold = 0
        for c in ['GC=F', 'CL=F', 'HG=F']:
            a = assets.get(c, {})
            m = a.get('momentum')
            threshold = a.get('threshold', 0)
            if m is not None and m >= threshold:
                valid[c] = m
        if valid:
            winner = max(valid, key=valid.get)
            commodity_counts[winner] += 1

    for c in ['GC=F', 'CL=F', 'HG=F']:
        count = commodity_counts.get(c, 0)
        total_valid = sum(commodity_counts.values())
        print(f"    {c} 胜出: {count} 天 ({count/total_valid*100:.1f}%)" if total_valid > 0 else f"    {c}: 无有效数据")

    return {'group_hold_count': dict(group_hold_count)}


def main():
    print_section("Task 3: 调仓逻辑问题诊断")

    nav = load_nav()
    signals = load_signals()
    days = load_detail_days()
    meta = load_detail_meta()

    print(f"  数据期间: {meta['start_date']} ~ {meta['end_date']}")

    results = {}

    # 3.1 最小持仓期
    results['min_hold'] = analyze_min_hold_days(days)

    # 3.2 波动率加权
    results['vol_weight'] = analyze_volatility_weighting(days)

    # 3.3 分组机制
    results['group'] = analyze_group_mechanism(days, meta)

    print_section("Task 3 总结")
    print("  1. 最小持仓期增加可减少无效调仓，但可能错过趋势转换")
    print("  2. 等权配置导致高波动资产主导组合风险，波动率加权可平衡")
    print("  3. 分组机制确保地域分散，但可能牺牲集中优势")

    return results


if __name__ == '__main__':
    main()