fix: generate_report now uses actual position_weights from daily_records

Previously hardcoded equal weight (1/select_num), ignoring config weight type. Now reads position_weights from last daily_record, correctly showing rank-based weights.
2026-06-07 23:29:27 +08:00
parent d898ba0fd5
commit 6a5ae8efbf
4 changed files with 468 additions and 3 deletions
--- a/rotation/experiments/debug_clf_2022.py
+++ b/rotation/experiments/debug_clf_2022.py
@@ -0,0 +1,128 @@
 """分析 2022年4月底~5月初 CL=F 入选原因"""
 import os, sys, math
 from pathlib import Path
 import numpy as np
 import pandas as pd
 PROJECT_ROOT = Path(__file__).parent.parent.parent
 sys.path.insert(0, str(PROJECT_ROOT))
 from rotation.simple_rotation import SimpleRotationStrategy, slope_r2_score
 if 'FLASK_API_URL' not in os.environ:
    os.environ['FLASK_API_URL'] = 'https://k3s.tokenpluse.xyz'
 strategy = SimpleRotationStrategy()
 strategy._preload_data()
 # 分析日期范围
 start = pd.Timestamp('2022-04-15')
 end = pd.Timestamp('2022-05-10')
 n_days = strategy.config.factor.n_days  # 25
 print("=" * 80)
 print(f"  分析 CL=F 动量信号 ({start.date()} ~ {end.date()})")
 print(f"  窗口长度: {n_days} 天")
 print("=" * 80)
 # 获取所有 signal_codes 的 score
 signal_codes = strategy.signal_codes
 date_range = pd.bdate_range(start, end)
 for date in date_range:
    scores = {}
    for code in signal_codes:
        if code not in strategy.index_data:
            continue
        df = strategy.index_data[code]
        mask = df.index <= date
        recent = df.loc[mask]
        if len(recent) < n_days:
            continue
        prices = recent['close'].values[-n_days:]
        score = slope_r2_score(prices)
        scores[code] = (score, prices[-1])
    if not scores:
        continue
    # 排序
    ranked = sorted(scores.items(), key=lambda x: x[1][0], reverse=True)
    cl_rank = None
    for i, (code, (score, price)) in enumerate(ranked):
        if code == 'CL=F':
            cl_rank = i + 1
            break
    cl_score = scores.get('CL=F', (None, None))[0]
    cl_price = scores.get('CL=F', (None, None))[1]
    print(f"\n{date.strftime('%Y-%m-%d')} | CL=F score={cl_score:.4f}, price={cl_price:.2f}, rank={cl_rank}/{len(ranked)}")
    print(f"  Top 5:")
    for i, (code, (score, price)) in enumerate(ranked[:5]):
        marker = " <<<" if code == 'CL=F' else ""
        print(f"    #{i+1} {code:<15} score={score:>10.4f}  price={price:.2f}{marker}")
 # 详细分析 CL=F 价格走势
 print(f"\n{'='*80}")
 print(f"  CL=F 价格走势 (2022年3月~5月)")
 print(f"{'='*80}")
 df_cl = strategy.index_data['CL=F']
 mask = (df_cl.index >= '2022-03-01') & (df_cl.index <= '2022-05-15')
 cl_prices = df_cl.loc[mask, 'close']
 for date, price in cl_prices.items():
    # 计算25天窗口的score
    mask2 = df_cl.index <= date
    recent = df_cl.loc[mask2]
    if len(recent) < n_days:
        continue
    prices = recent['close'].values[-n_days:]
    score = slope_r2_score(prices)
    normalized = prices / prices[0]
    slope, intercept = np.polyfit(np.arange(len(normalized)), normalized, 1)
    y_pred = slope * np.arange(len(normalized)) + intercept
    ss_res = np.sum((normalized - y_pred) ** 2)
    ss_tot = np.sum((normalized - np.mean(normalized)) ** 2)
    r2 = 1 - ss_res / ss_tot if ss_tot > 0 else 0
    flag = ""
    if date.strftime('%Y-%m-%d') in ('2022-04-29', '2022-05-05'):
        flag = " <<< 入选日"
    print(f"  {date.strftime('%Y-%m-%d')}  price={price:>8.2f}  score={score:>10.4f}  "
          f"slope={slope:>8.5f}  R²={r2:.4f}{flag}")
 # 分析 CL=F 的组内竞争
 print(f"\n{'='*80}")
 print(f"  CL=F 所在组: 查看组内竞争")
 print(f"{'='*80}")
 groups = strategy.config.asset_pools.by_group
 for group_name, assets in groups.items():
    group_codes = [a.signal_source for a in assets.values()]
    if 'CL=F' in group_codes:
        print(f"  组名: {group_name}")
        print(f"  组成员: {group_codes}")
        # 4/29 和 5/5 的组内得分
        for target_date_str in ['2022-04-29', '2022-05-05']:
            target_date = pd.Timestamp(target_date_str)
            print(f"\n  {target_date_str} 组内得分:")
            for code in group_codes:
                if code not in strategy.index_data:
                    continue
                df = strategy.index_data[code]
                mask = df.index <= target_date
                recent = df.loc[mask]
                if len(recent) < n_days:
                    print(f"    {code:<15} 数据不足")
                    continue
                prices = recent['close'].values[-n_days:]
                score = slope_r2_score(prices)
                marker = " <<< TOP1" if score == max(
                    slope_r2_score(strategy.index_data[c].loc[strategy.index_data[c].index <= target_date]['close'].values[-n_days:])
                    for c in group_codes if c in strategy.index_data and len(strategy.index_data[c].loc[strategy.index_data[c].index <= target_date]) >= n_days
                ) else ""
                print(f"    {code:<15} score={score:>10.4f}{marker}")
--- a/rotation/experiments/r2_distribution.py
+++ b/rotation/experiments/r2_distribution.py
@@ -0,0 +1,175 @@
 """分析短债(931862.CSI)和CL=F的R²分布"""
 import os, sys
 from pathlib import Path
 import numpy as np
 import pandas as pd
 PROJECT_ROOT = Path(__file__).parent.parent.parent
 sys.path.insert(0, str(PROJECT_ROOT))
 from rotation.simple_rotation import SimpleRotationStrategy
 if 'FLASK_API_URL' not in os.environ:
    os.environ['FLASK_API_URL'] = 'https://k3s.tokenpluse.xyz'
 strategy = SimpleRotationStrategy()
 strategy._preload_data()
 n_days = strategy.config.factor.n_days  # 25
 def compute_r2(prices):
    prices = np.clip(prices, 0.01, None)
    y = prices / prices[0]
    x = np.arange(len(y))
    slope, intercept = np.polyfit(x, y, 1)
    y_pred = slope * x + intercept
    ss_res = np.sum((y - y_pred) ** 2)
    ss_tot = np.sum((y - np.mean(y)) ** 2)
    r2 = 1 - ss_res / ss_tot if ss_tot > 0 else 0
    return r2, slope
 # 分析所有 signal_codes
 print("=" * 80)
 print(f"  各资产 R² 分布统计 (2020-01-10 ~ 2026-06-05, 窗口={n_days}天)")
 print("=" * 80)
 all_r2_data = {}
 for code in strategy.signal_codes:
    if code not in strategy.index_data:
        continue
    df = strategy.index_data[code]
    r2_list = []
    slope_list = []
    for i in range(n_days, len(df)):
        prices = df['close'].values[i-n_days:i]
        r2, slope = compute_r2(prices)
        r2_list.append(r2)
        slope_list.append(slope)
    r2_arr = np.array(r2_list)
    slope_arr = np.array(slope_list)
    all_r2_data[code] = (r2_arr, slope_arr)
    # 区分正slope和负slope时的R²
    pos_mask = slope_arr > 0
    neg_mask = slope_arr <= 0
    print(f"\n  {code} (n={len(r2_arr)})")
    print(f"    全部 R²:  mean={r2_arr.mean():.4f}  median={np.median(r2_arr):.4f}  "
          f"p10={np.percentile(r2_arr, 10):.4f}  p25={np.percentile(r2_arr, 25):.4f}  "
          f"p75={np.percentile(r2_arr, 75):.4f}  p90={np.percentile(r2_arr, 90):.4f}")
    print(f"    slope>0:  mean={r2_arr[pos_mask].mean():.4f}  "
          f"p10={np.percentile(r2_arr[pos_mask], 10):.4f}  "
          f"p25={np.percentile(r2_arr[pos_mask], 25):.4f}  "
          f"count={pos_mask.sum()}")
    print(f"    slope<=0: mean={r2_arr[neg_mask].mean():.4f}  "
          f"p10={np.percentile(r2_arr[neg_mask], 10):.4f}  "
          f"p25={np.percentile(r2_arr[neg_mask], 25):.4f}  "
          f"count={neg_mask.sum()}")
 # 重点对比：短债 vs CL=F
 print(f"\n{'=' * 80}")
 print(f"  重点对比: 931862.CSI (短债) vs CL=F (原油)")
 print(f"{'=' * 80}")
 for code in ['931862.CSI', 'CL=F']:
    r2_arr, slope_arr = all_r2_data[code]
    pos_mask = slope_arr > 0
    r2_pos = r2_arr[pos_mask]
    print(f"\n  {code} (正趋势 R², n={len(r2_pos)})")
    # 分位数
    for p in [1, 5, 10, 15, 20, 25, 30, 50, 75, 90, 95, 99]:
        val = np.percentile(r2_pos, p)
        print(f"    p{p:>2}: R²={val:.4f}")
 # CL=F 入选时的 R² 统计
 print(f"\n{'=' * 80}")
 print(f"  CL=F 入选持仓时的 R² (从 detail JSON 中提取)")
 print(f"{'=' * 80}")
 import json
 detail_path = PROJECT_ROOT / "rotation" / "results" / "simple_rotation_detail.json"
 with open(detail_path) as f:
    detail = json.load(f)
 clf_held_r2 = []
 for day in detail['days']:
    assets = day.get('assets', {})
    clf = assets.get('CL=F', {})
    if clf.get('is_held'):
        date = day['date']
        mom = clf.get('momentum', 0)
        rank = clf.get('rank')
        # 需要重新算这个日期的R² (用T-1数据)
        clf_held_r2.append((date, mom, rank))
 # 取前20个和后20个持仓日
 print(f"  CL=F 总持仓天数: {len(clf_held_r2)}")
 print(f"\n  入场日(added)的动量和R²:")
 entry_days = []
 for day in detail['days']:
    if 'CL=F' in day.get('added', []):
        date = day['date']
        clf = day['assets']['CL=F']
        entry_days.append((date, clf['momentum'], clf.get('rank')))
        # 计算信号日(T-1)的R²
        sig_date = pd.Timestamp(date) - pd.Timedelta(days=1)
        df = strategy.index_data['CL=F']
        mask = df.index <= sig_date
        recent = df.loc[mask]
        if len(recent) >= n_days:
            prices = recent['close'].values[-n_days:]
            r2, slope = compute_r2(prices)
            print(f"    {date}: momentum={clf['momentum']:>10.4f}  R²={r2:.4f}  slope={slope:.5f}  "
                  f"entry_price={clf.get('entry_price_idx', '?')}")
 print(f"\n  离场日(removed)的前一天动量和R²:")
 exit_days = []
 for day in detail['days']:
    if 'CL=F' in day.get('removed', []):
        date = day['date']
        # 信号日是T-1
        sig_date = pd.Timestamp(date) - pd.Timedelta(days=1)
        df = strategy.index_data['CL=F']
        mask = df.index <= sig_date
        recent = df.loc[mask]
        if len(recent) >= n_days:
            prices = recent['close'].values[-n_days:]
            r2, slope = compute_r2(prices)
        else:
            r2, slope = 0, 0
        # 找前一天的momentum
        clf = day['assets'].get('CL=F', {})
        print(f"    {date}: 信号日R²={r2:.4f}  slope={slope:.5f}  "
              f"momentum(当日)={clf.get('momentum', '?')}  "
              f"price={clf.get('index_close', '?')}")
 # 不同R²阈值下的回测影响估算
 print(f"\n{'=' * 80}")
 print(f"  R² 阈值对 CL=F 信号的过滤效果")
 print(f"{'=' * 80}")
 for threshold in [0.05, 0.10, 0.15, 0.20, 0.25, 0.30]:
    filtered_entries = []
    for day in detail['days']:
        if 'CL=F' in day.get('added', []):
            date = day['date']
            sig_date = pd.Timestamp(date) - pd.Timedelta(days=1)
            df = strategy.index_data['CL=F']
            mask = df.index <= sig_date
            recent = df.loc[mask]
            if len(recent) >= n_days:
                prices = recent['close'].values[-n_days:]
                r2, slope = compute_r2(prices)
                if r2 < threshold:
                    clf = day['assets']['CL=F']
                    # 查这个入场最终盈亏
                    entry_price = clf.get('entry_price_etf')
                    filtered_entries.append((date, r2, slope, clf['momentum']))
    print(f"  R²<{threshold:.2f} 过滤掉的入场: {len(filtered_entries)} 次")
    for date, r2, slope, mom in filtered_entries:
        print(f"    {date}: R²={r2:.4f}  slope={slope:.5f}  momentum={mom:.4f}")
--- a/rotation/simple_rotation.py
+++ b/rotation/simple_rotation.py
@@ -1203,7 +1203,7 @@ class SimpleRotationStrategy:
        # Build positions info for table
        # Sort holdings by momentum score descending
-        weight = 1.0 / self.select_num if self.select_num > 0 else 1.0
+        position_weights = last_rec.get('position_weights', {})
        sorted_holdings = sorted(holdings, key=lambda c: factors.get(c, 0) or 0, reverse=True)
        # Determine previous holdings to distinguish "调入" vs "维持"
@@ -1249,7 +1249,7 @@ class SimpleRotationStrategy:
            positions_info.append({
                'name': name, 'code': code, 'etf': etf_code,
-                'weight': weight, 'score': score,
+                'weight': position_weights.get(code, 1.0 / len(holdings)), 'score': score,
                'idx_close': idx_close, 'etf_close': etf_close,
                'premium': premium, 'action': action,
                'entry_date': entry_date, 'entry_price': entry_price,
@@ -1276,7 +1276,7 @@ class SimpleRotationStrategy:
                    premium = self._get_latest_premium(trade_code, last_date)
                    exit_positions.append({
                        'name': name, 'code': code, 'etf': etf_code,
-                        'weight': weight, 'score': None,
+                        'weight': 0, 'score': None,
                        'idx_close': idx_close, 'etf_close': etf_close,
                        'premium': premium, 'action': '调出',
                        'entry_date': None, 'entry_price': None,
--- a/scripts/diag_ic_calibration.py
+++ b/scripts/diag_ic_calibration.py
@@ -0,0 +1,162 @@
 """IC Calibration diagnostic analysis script"""
 import os, sys, numpy as np, pandas as pd
 from pathlib import Path
 from collections import Counter
 os.environ['FLASK_API_URL'] = 'https://k3s.tokenpluse.xyz'
 sys.path.insert(0, str(Path.cwd()))
 from rotation.simple_rotation import SimpleRotationStrategy
 from rotation.ic_calibration import ICCalibrator
 def run_and_analyze(select_num, ic_enabled):
    s = SimpleRotationStrategy()
    s.config.rotation.select_num = select_num
    s.select_num = select_num
    if ic_enabled:
        s.config.rotation.ic_calibration.enabled = True
        s.ic_calibrator = ICCalibrator(fwd_days=5, min_samples=120, ic_method='pearson')
    r = s.run()
    return s, s.daily_records
 print("=" * 90)
 print("  Running 4 experiments...")
 print("=" * 90)
 s1, r1 = run_and_analyze(3, False)
 s2, r2 = run_and_analyze(3, True)
 s3, r3 = run_and_analyze(1, False)
 s4, r4 = run_and_analyze(1, True)
 # === 1. 持仓频率分析 ===
 print("\n\n" + "=" * 90)
 print("  1. 持仓频率对比")
 print("=" * 90)
 for label, recs, s_obj in [
    ("原始 select=3", r1, s1),
    ("IC校准 select=3", r2, s2),
    ("原始 select=1", r3, s3),
    ("IC校准 select=1", r4, s4),
 ]:
    hold_counter = Counter()
    for rec in recs:
        for h in rec['holdings']:
            nm = s_obj.config.asset_pools.assets.get(h)
            name = nm.name if nm else h
            hold_counter[name] += 1
    print(f"\n  [{label}] 持仓天数分布:")
    for name, cnt in hold_counter.most_common():
        pct = cnt / len(recs) * 100
        print(f"    {name:<12}: {cnt:>5} 天 ({pct:>5.1f}%)")
 # === 2. 关键资产选中变化 ===
 print("\n\n" + "=" * 90)
 print("  2. IC校准 select=3: 关键资产被选中变化")
 print("=" * 90)
 targets = ['日经225', '德国DAX', '纳指100', '恒生指数', '黄金', '有色金属', '中证红利低波', '原油', '创业板指']
 for name_key in targets:
    orig = sum(1 for r in r1 if any(
        name_key == (s1.config.asset_pools.assets.get(h).name if s1.config.asset_pools.assets.get(h) else h)
        for h in r['holdings']))
    calib = sum(1 for r in r2 if any(
        name_key == (s2.config.asset_pools.assets.get(h).name if s2.config.asset_pools.assets.get(h) else h)
        for h in r['holdings']))
    print(f"  {name_key:<12}: 原始={orig:>5}天 -> IC校准={calib:>5}天 (差={calib-orig:+d})")
 # === 3. 收益质量分析 ===
 print("\n\n" + "=" * 90)
 print("  3. 持有期收益质量")
 print("=" * 90)
 for label, recs in [
    ("原始 select=3", r1), ("IC校准 select=3", r2),
    ("原始 select=1", r3), ("IC校准 select=1", r4),
 ]:
    arr = np.array([r['daily_return'] for r in recs])
    win = arr[arr > 0]
    lose = arr[arr < 0]
    print(f"\n  [{label}]:")
    print(f"    日均: {arr.mean()*100:+.4f}%  胜率: {(arr>0).mean()*100:.1f}%")
    print(f"    盈利均: {win.mean()*100:+.4f}%  亏损均: {lose.mean()*100:+.4f}%  盈亏比: {abs(win.mean()/lose.mean()):.3f}")
    # Worst 5 days
    sorted_rets = sorted(recs, key=lambda x: x['daily_return'])
    worst5 = [r['daily_return'] * 100 for r in sorted_rets[:5]]
    best5 = [r['daily_return'] * 100 for r in sorted_rets[-5:]]
    print(f"    最差5天: {['%.2f%%' % v for v in worst5]}")
    print(f"    最好5天: {['%.2f%%' % v for v in best5]}")
 # === 4. 分段绩效 ===
 print("\n\n" + "=" * 90)
 print("  4. 分段绩效: 冷启动期(前170天) vs 校准生效期")
 print("=" * 90)
 for label, recs in [
    ("原始 select=1", r3), ("IC校准 select=1", r4),
    ("原始 select=3", r1), ("IC校准 select=3", r2),
 ]:
    cold = recs[:170]
    warm = recs[170:]
    cold_rets = np.array([r['daily_return'] for r in cold])
    warm_rets = np.array([r['daily_return'] for r in warm])
    cold_cum = np.prod(1 + cold_rets) - 1
    warm_cum = np.prod(1 + warm_rets) - 1
    cold_ann = (1 + cold_cum) ** (252 / len(cold_rets)) - 1
    warm_ann = (1 + warm_cum) ** (252 / len(warm_rets)) - 1
    warm_nav = np.cumprod(1 + warm_rets)
    warm_peak = np.maximum.accumulate(warm_nav)
    warm_dd = (warm_nav - warm_peak) / warm_peak
    warm_maxdd = warm_dd.min()
    cold_nav = np.cumprod(1 + cold_rets)
    cold_peak = np.maximum.accumulate(cold_nav)
    cold_dd = (cold_nav - cold_peak) / cold_peak
    cold_maxdd = cold_dd.min()
    print(f"\n  [{label}]:")
    print(f"    冷启动(1-170天): 累计={cold_cum*100:+.2f}% 年化={cold_ann*100:.2f}% 胜率={(cold_rets>0).mean()*100:.1f}% 最大回撤={cold_maxdd*100:.2f}%")
    print(f"    校准后(170天+):  累计={warm_cum*100:+.2f}% 年化={warm_ann*100:.2f}% 胜率={(warm_rets>0).mean()*100:.1f}% 最大回撤={warm_maxdd*100:.2f}%")
 # === 5. IC校准 select=3 退化的根因：新增持仓的收益质量 ===
 print("\n\n" + "=" * 90)
 print("  5. IC校准 select=3: 新增持仓收益分析")
 print("=" * 90)
 # For each day, check if holdings changed between original and calibrated
 changed_days = 0
 new_asset_rets = []
 removed_asset_rets = []
 for orig_rec, calib_rec in zip(r1, r2):
    if orig_rec['date'] != calib_rec['date']:
        continue
    orig_set = set(orig_rec['holdings'])
    calib_set = set(calib_rec['holdings'])
    if orig_set != calib_set:
        changed_days += 1
        # The return of the calibrated portfolio on that day
        new_asset_rets.append(calib_rec['daily_return'])
        removed_asset_rets.append(orig_rec['daily_return'])
 print(f"\n  持仓变化天数: {changed_days} / {len(r1)}")
 if new_asset_rets:
    new_arr = np.array(new_asset_rets)
    old_arr = np.array(removed_asset_rets)
    diff = new_arr - old_arr
    print(f"  变化日 - IC校准收益: {new_arr.mean()*100:+.4f}%  原始收益: {old_arr.mean()*100:+.4f}%  差: {diff.mean()*100:+.4f}%")
    print(f"  变化日胜率: IC校准={((new_arr>0).mean()*100):.1f}%  原始={((old_arr>0).mean()*100):.1f}%")
    print(f"  变化日累计: IC校准={new_arr.sum()*100:+.2f}%  原始={old_arr.sum()*100:+.2f}%  差={diff.sum()*100:+.2f}%")
 # === 6. IC stats ===
 print("\n\n" + "=" * 90)
 print("  6. IC校准最终状态")
 print("=" * 90)
 calibrator = s2.ic_calibrator
 for code in sorted(calibrator._history.keys()):
    stats = calibrator.get_stats(code)
    nm = s2.config.asset_pools.assets.get(code)
    name = nm.name if nm else code
    print(f"  {name:<12} ({code:<14}): n={stats.n_samples:>5}  IC={stats.ic_value:+.4f}  sign={stats.ic_sign:+d}")