From 6a5ae8efbf2b64b26233d21efac7fcb5ef22f3c9 Mon Sep 17 00:00:00 2001 From: aszerW Date: Sun, 7 Jun 2026 23:29:27 +0800 Subject: [PATCH] fix: generate_report now uses actual position_weights from daily_records Previously hardcoded equal weight (1/select_num), ignoring config weight type. Now reads position_weights from last daily_record, correctly showing rank-based weights. --- rotation/experiments/debug_clf_2022.py | 128 +++++++++++++++++ rotation/experiments/r2_distribution.py | 175 ++++++++++++++++++++++++ rotation/simple_rotation.py | 6 +- scripts/diag_ic_calibration.py | 162 ++++++++++++++++++++++ 4 files changed, 468 insertions(+), 3 deletions(-) create mode 100644 rotation/experiments/debug_clf_2022.py create mode 100644 rotation/experiments/r2_distribution.py create mode 100644 scripts/diag_ic_calibration.py diff --git a/rotation/experiments/debug_clf_2022.py b/rotation/experiments/debug_clf_2022.py new file mode 100644 index 0000000..140f4c3 --- /dev/null +++ b/rotation/experiments/debug_clf_2022.py @@ -0,0 +1,128 @@ +"""分析 2022年4月底~5月初 CL=F 入选原因""" +import os, sys, math +from pathlib import Path +import numpy as np +import pandas as pd + +PROJECT_ROOT = Path(__file__).parent.parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) + +from rotation.simple_rotation import SimpleRotationStrategy, slope_r2_score + +if 'FLASK_API_URL' not in os.environ: + os.environ['FLASK_API_URL'] = 'https://k3s.tokenpluse.xyz' + +strategy = SimpleRotationStrategy() +strategy._preload_data() + +# 分析日期范围 +start = pd.Timestamp('2022-04-15') +end = pd.Timestamp('2022-05-10') +n_days = strategy.config.factor.n_days # 25 + +print("=" * 80) +print(f" 分析 CL=F 动量信号 ({start.date()} ~ {end.date()})") +print(f" 窗口长度: {n_days} 天") +print("=" * 80) + +# 获取所有 signal_codes 的 score +signal_codes = strategy.signal_codes +date_range = pd.bdate_range(start, end) + +for date in date_range: + scores = {} + for code in signal_codes: + if code not in strategy.index_data: + continue + df = strategy.index_data[code] + mask = df.index <= date + recent = df.loc[mask] + if len(recent) < n_days: + continue + prices = recent['close'].values[-n_days:] + score = slope_r2_score(prices) + scores[code] = (score, prices[-1]) + + if not scores: + continue + + # 排序 + ranked = sorted(scores.items(), key=lambda x: x[1][0], reverse=True) + cl_rank = None + for i, (code, (score, price)) in enumerate(ranked): + if code == 'CL=F': + cl_rank = i + 1 + break + + cl_score = scores.get('CL=F', (None, None))[0] + cl_price = scores.get('CL=F', (None, None))[1] + + print(f"\n{date.strftime('%Y-%m-%d')} | CL=F score={cl_score:.4f}, price={cl_price:.2f}, rank={cl_rank}/{len(ranked)}") + print(f" Top 5:") + for i, (code, (score, price)) in enumerate(ranked[:5]): + marker = " <<<" if code == 'CL=F' else "" + print(f" #{i+1} {code:<15} score={score:>10.4f} price={price:.2f}{marker}") + +# 详细分析 CL=F 价格走势 +print(f"\n{'='*80}") +print(f" CL=F 价格走势 (2022年3月~5月)") +print(f"{'='*80}") + +df_cl = strategy.index_data['CL=F'] +mask = (df_cl.index >= '2022-03-01') & (df_cl.index <= '2022-05-15') +cl_prices = df_cl.loc[mask, 'close'] + +for date, price in cl_prices.items(): + # 计算25天窗口的score + mask2 = df_cl.index <= date + recent = df_cl.loc[mask2] + if len(recent) < n_days: + continue + prices = recent['close'].values[-n_days:] + score = slope_r2_score(prices) + normalized = prices / prices[0] + slope, intercept = np.polyfit(np.arange(len(normalized)), normalized, 1) + y_pred = slope * np.arange(len(normalized)) + intercept + ss_res = np.sum((normalized - y_pred) ** 2) + ss_tot = np.sum((normalized - np.mean(normalized)) ** 2) + r2 = 1 - ss_res / ss_tot if ss_tot > 0 else 0 + + flag = "" + if date.strftime('%Y-%m-%d') in ('2022-04-29', '2022-05-05'): + flag = " <<< 入选日" + + print(f" {date.strftime('%Y-%m-%d')} price={price:>8.2f} score={score:>10.4f} " + f"slope={slope:>8.5f} R²={r2:.4f}{flag}") + +# 分析 CL=F 的组内竞争 +print(f"\n{'='*80}") +print(f" CL=F 所在组: 查看组内竞争") +print(f"{'='*80}") + +groups = strategy.config.asset_pools.by_group +for group_name, assets in groups.items(): + group_codes = [a.signal_source for a in assets.values()] + if 'CL=F' in group_codes: + print(f" 组名: {group_name}") + print(f" 组成员: {group_codes}") + + # 4/29 和 5/5 的组内得分 + for target_date_str in ['2022-04-29', '2022-05-05']: + target_date = pd.Timestamp(target_date_str) + print(f"\n {target_date_str} 组内得分:") + for code in group_codes: + if code not in strategy.index_data: + continue + df = strategy.index_data[code] + mask = df.index <= target_date + recent = df.loc[mask] + if len(recent) < n_days: + print(f" {code:<15} 数据不足") + continue + prices = recent['close'].values[-n_days:] + score = slope_r2_score(prices) + marker = " <<< TOP1" if score == max( + slope_r2_score(strategy.index_data[c].loc[strategy.index_data[c].index <= target_date]['close'].values[-n_days:]) + for c in group_codes if c in strategy.index_data and len(strategy.index_data[c].loc[strategy.index_data[c].index <= target_date]) >= n_days + ) else "" + print(f" {code:<15} score={score:>10.4f}{marker}") diff --git a/rotation/experiments/r2_distribution.py b/rotation/experiments/r2_distribution.py new file mode 100644 index 0000000..27fcc27 --- /dev/null +++ b/rotation/experiments/r2_distribution.py @@ -0,0 +1,175 @@ +"""分析短债(931862.CSI)和CL=F的R²分布""" +import os, sys +from pathlib import Path +import numpy as np +import pandas as pd + +PROJECT_ROOT = Path(__file__).parent.parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) + +from rotation.simple_rotation import SimpleRotationStrategy + +if 'FLASK_API_URL' not in os.environ: + os.environ['FLASK_API_URL'] = 'https://k3s.tokenpluse.xyz' + +strategy = SimpleRotationStrategy() +strategy._preload_data() + +n_days = strategy.config.factor.n_days # 25 + +def compute_r2(prices): + prices = np.clip(prices, 0.01, None) + y = prices / prices[0] + x = np.arange(len(y)) + slope, intercept = np.polyfit(x, y, 1) + y_pred = slope * x + intercept + ss_res = np.sum((y - y_pred) ** 2) + ss_tot = np.sum((y - np.mean(y)) ** 2) + r2 = 1 - ss_res / ss_tot if ss_tot > 0 else 0 + return r2, slope + +# 分析所有 signal_codes +print("=" * 80) +print(f" 各资产 R² 分布统计 (2020-01-10 ~ 2026-06-05, 窗口={n_days}天)") +print("=" * 80) + +all_r2_data = {} + +for code in strategy.signal_codes: + if code not in strategy.index_data: + continue + df = strategy.index_data[code] + r2_list = [] + slope_list = [] + for i in range(n_days, len(df)): + prices = df['close'].values[i-n_days:i] + r2, slope = compute_r2(prices) + r2_list.append(r2) + slope_list.append(slope) + + r2_arr = np.array(r2_list) + slope_arr = np.array(slope_list) + all_r2_data[code] = (r2_arr, slope_arr) + + # 区分正slope和负slope时的R² + pos_mask = slope_arr > 0 + neg_mask = slope_arr <= 0 + + print(f"\n {code} (n={len(r2_arr)})") + print(f" 全部 R²: mean={r2_arr.mean():.4f} median={np.median(r2_arr):.4f} " + f"p10={np.percentile(r2_arr, 10):.4f} p25={np.percentile(r2_arr, 25):.4f} " + f"p75={np.percentile(r2_arr, 75):.4f} p90={np.percentile(r2_arr, 90):.4f}") + print(f" slope>0: mean={r2_arr[pos_mask].mean():.4f} " + f"p10={np.percentile(r2_arr[pos_mask], 10):.4f} " + f"p25={np.percentile(r2_arr[pos_mask], 25):.4f} " + f"count={pos_mask.sum()}") + print(f" slope<=0: mean={r2_arr[neg_mask].mean():.4f} " + f"p10={np.percentile(r2_arr[neg_mask], 10):.4f} " + f"p25={np.percentile(r2_arr[neg_mask], 25):.4f} " + f"count={neg_mask.sum()}") + +# 重点对比:短债 vs CL=F +print(f"\n{'=' * 80}") +print(f" 重点对比: 931862.CSI (短债) vs CL=F (原油)") +print(f"{'=' * 80}") + +for code in ['931862.CSI', 'CL=F']: + r2_arr, slope_arr = all_r2_data[code] + pos_mask = slope_arr > 0 + r2_pos = r2_arr[pos_mask] + + print(f"\n {code} (正趋势 R², n={len(r2_pos)})") + # 分位数 + for p in [1, 5, 10, 15, 20, 25, 30, 50, 75, 90, 95, 99]: + val = np.percentile(r2_pos, p) + print(f" p{p:>2}: R²={val:.4f}") + +# CL=F 入选时的 R² 统计 +print(f"\n{'=' * 80}") +print(f" CL=F 入选持仓时的 R² (从 detail JSON 中提取)") +print(f"{'=' * 80}") + +import json +detail_path = PROJECT_ROOT / "rotation" / "results" / "simple_rotation_detail.json" +with open(detail_path) as f: + detail = json.load(f) + +clf_held_r2 = [] +for day in detail['days']: + assets = day.get('assets', {}) + clf = assets.get('CL=F', {}) + if clf.get('is_held'): + date = day['date'] + mom = clf.get('momentum', 0) + rank = clf.get('rank') + # 需要重新算这个日期的R² (用T-1数据) + clf_held_r2.append((date, mom, rank)) + +# 取前20个和后20个持仓日 +print(f" CL=F 总持仓天数: {len(clf_held_r2)}") +print(f"\n 入场日(added)的动量和R²:") + +entry_days = [] +for day in detail['days']: + if 'CL=F' in day.get('added', []): + date = day['date'] + clf = day['assets']['CL=F'] + entry_days.append((date, clf['momentum'], clf.get('rank'))) + # 计算信号日(T-1)的R² + sig_date = pd.Timestamp(date) - pd.Timedelta(days=1) + df = strategy.index_data['CL=F'] + mask = df.index <= sig_date + recent = df.loc[mask] + if len(recent) >= n_days: + prices = recent['close'].values[-n_days:] + r2, slope = compute_r2(prices) + print(f" {date}: momentum={clf['momentum']:>10.4f} R²={r2:.4f} slope={slope:.5f} " + f"entry_price={clf.get('entry_price_idx', '?')}") + +print(f"\n 离场日(removed)的前一天动量和R²:") +exit_days = [] +for day in detail['days']: + if 'CL=F' in day.get('removed', []): + date = day['date'] + # 信号日是T-1 + sig_date = pd.Timestamp(date) - pd.Timedelta(days=1) + df = strategy.index_data['CL=F'] + mask = df.index <= sig_date + recent = df.loc[mask] + if len(recent) >= n_days: + prices = recent['close'].values[-n_days:] + r2, slope = compute_r2(prices) + else: + r2, slope = 0, 0 + # 找前一天的momentum + clf = day['assets'].get('CL=F', {}) + print(f" {date}: 信号日R²={r2:.4f} slope={slope:.5f} " + f"momentum(当日)={clf.get('momentum', '?')} " + f"price={clf.get('index_close', '?')}") + +# 不同R²阈值下的回测影响估算 +print(f"\n{'=' * 80}") +print(f" R² 阈值对 CL=F 信号的过滤效果") +print(f"{'=' * 80}") + +for threshold in [0.05, 0.10, 0.15, 0.20, 0.25, 0.30]: + filtered_entries = [] + for day in detail['days']: + if 'CL=F' in day.get('added', []): + date = day['date'] + sig_date = pd.Timestamp(date) - pd.Timedelta(days=1) + df = strategy.index_data['CL=F'] + mask = df.index <= sig_date + recent = df.loc[mask] + if len(recent) >= n_days: + prices = recent['close'].values[-n_days:] + r2, slope = compute_r2(prices) + if r2 < threshold: + clf = day['assets']['CL=F'] + # 查这个入场最终盈亏 + entry_price = clf.get('entry_price_etf') + filtered_entries.append((date, r2, slope, clf['momentum'])) + + print(f" R²<{threshold:.2f} 过滤掉的入场: {len(filtered_entries)} 次") + for date, r2, slope, mom in filtered_entries: + print(f" {date}: R²={r2:.4f} slope={slope:.5f} momentum={mom:.4f}") diff --git a/rotation/simple_rotation.py b/rotation/simple_rotation.py index 81d500d..16e6d91 100644 --- a/rotation/simple_rotation.py +++ b/rotation/simple_rotation.py @@ -1203,7 +1203,7 @@ class SimpleRotationStrategy: # Build positions info for table # Sort holdings by momentum score descending - weight = 1.0 / self.select_num if self.select_num > 0 else 1.0 + position_weights = last_rec.get('position_weights', {}) sorted_holdings = sorted(holdings, key=lambda c: factors.get(c, 0) or 0, reverse=True) # Determine previous holdings to distinguish "调入" vs "维持" @@ -1249,7 +1249,7 @@ class SimpleRotationStrategy: positions_info.append({ 'name': name, 'code': code, 'etf': etf_code, - 'weight': weight, 'score': score, + 'weight': position_weights.get(code, 1.0 / len(holdings)), 'score': score, 'idx_close': idx_close, 'etf_close': etf_close, 'premium': premium, 'action': action, 'entry_date': entry_date, 'entry_price': entry_price, @@ -1276,7 +1276,7 @@ class SimpleRotationStrategy: premium = self._get_latest_premium(trade_code, last_date) exit_positions.append({ 'name': name, 'code': code, 'etf': etf_code, - 'weight': weight, 'score': None, + 'weight': 0, 'score': None, 'idx_close': idx_close, 'etf_close': etf_close, 'premium': premium, 'action': '调出', 'entry_date': None, 'entry_price': None, diff --git a/scripts/diag_ic_calibration.py b/scripts/diag_ic_calibration.py new file mode 100644 index 0000000..658240d --- /dev/null +++ b/scripts/diag_ic_calibration.py @@ -0,0 +1,162 @@ +"""IC Calibration diagnostic analysis script""" +import os, sys, numpy as np, pandas as pd +from pathlib import Path +from collections import Counter +os.environ['FLASK_API_URL'] = 'https://k3s.tokenpluse.xyz' +sys.path.insert(0, str(Path.cwd())) +from rotation.simple_rotation import SimpleRotationStrategy +from rotation.ic_calibration import ICCalibrator + +def run_and_analyze(select_num, ic_enabled): + s = SimpleRotationStrategy() + s.config.rotation.select_num = select_num + s.select_num = select_num + if ic_enabled: + s.config.rotation.ic_calibration.enabled = True + s.ic_calibrator = ICCalibrator(fwd_days=5, min_samples=120, ic_method='pearson') + r = s.run() + return s, s.daily_records + +print("=" * 90) +print(" Running 4 experiments...") +print("=" * 90) + +s1, r1 = run_and_analyze(3, False) +s2, r2 = run_and_analyze(3, True) +s3, r3 = run_and_analyze(1, False) +s4, r4 = run_and_analyze(1, True) + +# === 1. 持仓频率分析 === +print("\n\n" + "=" * 90) +print(" 1. 持仓频率对比") +print("=" * 90) + +for label, recs, s_obj in [ + ("原始 select=3", r1, s1), + ("IC校准 select=3", r2, s2), + ("原始 select=1", r3, s3), + ("IC校准 select=1", r4, s4), +]: + hold_counter = Counter() + for rec in recs: + for h in rec['holdings']: + nm = s_obj.config.asset_pools.assets.get(h) + name = nm.name if nm else h + hold_counter[name] += 1 + + print(f"\n [{label}] 持仓天数分布:") + for name, cnt in hold_counter.most_common(): + pct = cnt / len(recs) * 100 + print(f" {name:<12}: {cnt:>5} 天 ({pct:>5.1f}%)") + +# === 2. 关键资产选中变化 === +print("\n\n" + "=" * 90) +print(" 2. IC校准 select=3: 关键资产被选中变化") +print("=" * 90) + +targets = ['日经225', '德国DAX', '纳指100', '恒生指数', '黄金', '有色金属', '中证红利低波', '原油', '创业板指'] +for name_key in targets: + orig = sum(1 for r in r1 if any( + name_key == (s1.config.asset_pools.assets.get(h).name if s1.config.asset_pools.assets.get(h) else h) + for h in r['holdings'])) + calib = sum(1 for r in r2 if any( + name_key == (s2.config.asset_pools.assets.get(h).name if s2.config.asset_pools.assets.get(h) else h) + for h in r['holdings'])) + print(f" {name_key:<12}: 原始={orig:>5}天 -> IC校准={calib:>5}天 (差={calib-orig:+d})") + +# === 3. 收益质量分析 === +print("\n\n" + "=" * 90) +print(" 3. 持有期收益质量") +print("=" * 90) + +for label, recs in [ + ("原始 select=3", r1), ("IC校准 select=3", r2), + ("原始 select=1", r3), ("IC校准 select=1", r4), +]: + arr = np.array([r['daily_return'] for r in recs]) + win = arr[arr > 0] + lose = arr[arr < 0] + print(f"\n [{label}]:") + print(f" 日均: {arr.mean()*100:+.4f}% 胜率: {(arr>0).mean()*100:.1f}%") + print(f" 盈利均: {win.mean()*100:+.4f}% 亏损均: {lose.mean()*100:+.4f}% 盈亏比: {abs(win.mean()/lose.mean()):.3f}") + + # Worst 5 days + sorted_rets = sorted(recs, key=lambda x: x['daily_return']) + worst5 = [r['daily_return'] * 100 for r in sorted_rets[:5]] + best5 = [r['daily_return'] * 100 for r in sorted_rets[-5:]] + print(f" 最差5天: {['%.2f%%' % v for v in worst5]}") + print(f" 最好5天: {['%.2f%%' % v for v in best5]}") + +# === 4. 分段绩效 === +print("\n\n" + "=" * 90) +print(" 4. 分段绩效: 冷启动期(前170天) vs 校准生效期") +print("=" * 90) + +for label, recs in [ + ("原始 select=1", r3), ("IC校准 select=1", r4), + ("原始 select=3", r1), ("IC校准 select=3", r2), +]: + cold = recs[:170] + warm = recs[170:] + + cold_rets = np.array([r['daily_return'] for r in cold]) + warm_rets = np.array([r['daily_return'] for r in warm]) + + cold_cum = np.prod(1 + cold_rets) - 1 + warm_cum = np.prod(1 + warm_rets) - 1 + cold_ann = (1 + cold_cum) ** (252 / len(cold_rets)) - 1 + warm_ann = (1 + warm_cum) ** (252 / len(warm_rets)) - 1 + + warm_nav = np.cumprod(1 + warm_rets) + warm_peak = np.maximum.accumulate(warm_nav) + warm_dd = (warm_nav - warm_peak) / warm_peak + warm_maxdd = warm_dd.min() + + cold_nav = np.cumprod(1 + cold_rets) + cold_peak = np.maximum.accumulate(cold_nav) + cold_dd = (cold_nav - cold_peak) / cold_peak + cold_maxdd = cold_dd.min() + + print(f"\n [{label}]:") + print(f" 冷启动(1-170天): 累计={cold_cum*100:+.2f}% 年化={cold_ann*100:.2f}% 胜率={(cold_rets>0).mean()*100:.1f}% 最大回撤={cold_maxdd*100:.2f}%") + print(f" 校准后(170天+): 累计={warm_cum*100:+.2f}% 年化={warm_ann*100:.2f}% 胜率={(warm_rets>0).mean()*100:.1f}% 最大回撤={warm_maxdd*100:.2f}%") + +# === 5. IC校准 select=3 退化的根因:新增持仓的收益质量 === +print("\n\n" + "=" * 90) +print(" 5. IC校准 select=3: 新增持仓收益分析") +print("=" * 90) + +# For each day, check if holdings changed between original and calibrated +changed_days = 0 +new_asset_rets = [] +removed_asset_rets = [] +for orig_rec, calib_rec in zip(r1, r2): + if orig_rec['date'] != calib_rec['date']: + continue + orig_set = set(orig_rec['holdings']) + calib_set = set(calib_rec['holdings']) + if orig_set != calib_set: + changed_days += 1 + # The return of the calibrated portfolio on that day + new_asset_rets.append(calib_rec['daily_return']) + removed_asset_rets.append(orig_rec['daily_return']) + +print(f"\n 持仓变化天数: {changed_days} / {len(r1)}") +if new_asset_rets: + new_arr = np.array(new_asset_rets) + old_arr = np.array(removed_asset_rets) + diff = new_arr - old_arr + print(f" 变化日 - IC校准收益: {new_arr.mean()*100:+.4f}% 原始收益: {old_arr.mean()*100:+.4f}% 差: {diff.mean()*100:+.4f}%") + print(f" 变化日胜率: IC校准={((new_arr>0).mean()*100):.1f}% 原始={((old_arr>0).mean()*100):.1f}%") + print(f" 变化日累计: IC校准={new_arr.sum()*100:+.2f}% 原始={old_arr.sum()*100:+.2f}% 差={diff.sum()*100:+.2f}%") + +# === 6. IC stats === +print("\n\n" + "=" * 90) +print(" 6. IC校准最终状态") +print("=" * 90) +calibrator = s2.ic_calibrator +for code in sorted(calibrator._history.keys()): + stats = calibrator.get_stats(code) + nm = s2.config.asset_pools.assets.get(code) + name = nm.name if nm else code + print(f" {name:<12} ({code:<14}): n={stats.n_samples:>5} IC={stats.ic_value:+.4f} sign={stats.ic_sign:+d}")