""" 分析历史 Top 3 标的中存在负分的情况 (正式版) """ import sys import yaml import pandas as pd import numpy as np from pathlib import Path from datetime import datetime from dotenv import load_dotenv # 加载环境变量 load_dotenv() # 添加项目根目录 sys.path.insert(0, str(Path(__file__).parent.parent)) from strategies.rotation.engine import RotationStrategy from core.factors.momentum import compute_factors def load_config(config_path: str) -> dict: with open(config_path, "r", encoding="utf-8") as f: return yaml.safe_load(f) def analyze_negative_scores(): config_path = "config/strategies/rotation.yaml" config = load_config(config_path) # 强制不使用过滤,以获取完整数据 config['diversified'] = True config['select_num'] = 3 strategy = RotationStrategy(config) # 使用策略内部方法获取数据 with strategy.data_source: index_data, etf_data, etf_nav_data, benchmark_data, valid_codes, index_ohlcv_data = strategy.data_source.fetch_all( config['code_list'], config['benchmark']['code'], config["start_date"], datetime.now().strftime('%Y-%m-%d') ) # 手动计算因子 (不带过滤) # 注意:为了分析原始得分,我们将 compute_factors 内部调用的过滤函数暂时跳过或分析结果 factor_data, valid_codes = compute_factors( index_data, valid_codes, n=config["n_days"], factor_type=config["factor_type"], auto_day=config.get("auto_day", False), index_ohlcv_data=index_ohlcv_data ) score_cols = [c for c in factor_data.columns if c.startswith("得分_")] code_config = config['code_list'] total_days = len(factor_data) results = [] last_top_3 = set() rebalance_count = 0 for date, row in factor_data.iterrows(): scores = row[score_cols].dropna() if scores.empty: continue # 模拟 diversified 逻辑下的 Top 3 (不带 >0 过滤) cat_best = {} for col_name, s in scores.items(): code = col_name.replace("得分_", "") cat = code_config.get(code, {}).get("market", "未知") if cat not in cat_best or s > cat_best[cat][1]: cat_best[cat] = (code, s) sorted_cats = sorted(cat_best.values(), key=lambda x: x[1], reverse=True) top_3_raw = sorted_cats[:3] current_top_3_codes = set(code for code, s in top_3_raw) # 判断是否发生调仓(目标持仓集合发生变化) if current_top_3_codes != last_top_3: rebalance_count += 1 # 统计调仓日这 3 只中得分 <= 0 的数量 neg_count = sum(1 for code, s in top_3_raw if s <= 0) results.append({ "date": date, "neg_count": neg_count, "top_1_score": top_3_raw[0][1], "top_2_score": top_3_raw[1][1] if len(top_3_raw)>1 else np.nan, "top_3_score": top_3_raw[2][1] if len(top_3_raw)>2 else np.nan, "top_1_name": code_config.get(top_3_raw[0][0], {}).get('name') }) last_top_3 = current_top_3_codes neg_df = pd.DataFrame(results) print(f"\n{'='*60}") print(f"调仓日 (Rebalance Day) Top 3 标的出现负分情况分析") print(f"{'='*60}") print(f"总调仓次数: {rebalance_count}") print(f"涉及负分(<=0)的调仓次数: {len(neg_df[neg_df['neg_count']>0])} ({len(neg_df[neg_df['neg_count']>0])/rebalance_count:.1%})") if not neg_df.empty: print(f"\n调仓日负分详细分布:") print(f" - 只有 1 只标的为负: {len(neg_df[neg_df['neg_count']==1])} 次") print(f" - 有 2 只标的为负: {len(neg_df[neg_df['neg_count']==2])} 次") print(f" - 全部 3 只标的均为负: {len(neg_df[neg_df['neg_count']==3])} 次") print(f"\n最近 10 次涉及负分的调仓详情:") neg_df['date'] = pd.to_datetime(neg_df['date']) print(neg_df[neg_df['neg_count']>0][['date', 'neg_count', 'top_1_score', 'top_1_name']].tail(10)) if __name__ == "__main__": analyze_negative_scores()