Files
etf/framework_v2/scripts/verify_etf_hfq_fix.py
aszerW 6a86a27108 test(scripts): 新增ETF数据获取验证脚本
新增脚本:
- verify_etf_hfq_fix.py: 验证指数使用raw、ETF使用hfq
- compare_index_vs_etf_returns.py: 对比指数收益vs ETF收益的KPI指标

验证内容:
- 指数数据完整性检查
- ETF数据完整性检查
- ETF是否正确使用hfq后复权价格(抽样对比raw和hfq)
- 验证510300.SH等ETF的hfq/raw比值(应>1.0)
2026-05-26 19:55:01 +08:00

134 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
验证 ETF 数据获取修复
测试点:
1. 指数数据使用 adj='raw'
2. ETF 数据使用 adj='hfq'
3. 数据字典中同时包含指数和 ETF
"""
import sys
from pathlib import Path
# 添加项目根目录到路径
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
from framework_v2.config import load_config
from framework_v2.strategies.rotation.rotation import GlobalRotationStrategy
def main():
print("=" * 70)
print(" 验证 ETF 数据获取修复")
print("=" * 70)
# 加载配置
config_path = project_root / 'framework_v2' / 'config' / 'rotation_global.yaml'
print(f"\n加载配置: {config_path}")
config = load_config(str(config_path))
# 初始化策略
strategy = GlobalRotationStrategy(config)
# 获取数据
print("\n" + "=" * 70)
print("获取数据...")
print("=" * 70)
data = strategy.get_data()
# 分析数据结构
print("\n" + "=" * 70)
print("数据结构分析")
print("=" * 70)
# 获取映射关系
signal_to_trade = config.asset_pools.get_signal_to_trade_mapping()
signal_codes = config.asset_pools.get_signal_codes()
trade_codes = set(signal_to_trade.values())
print(f"\n信号标的(指数): {len(signal_codes)}")
for code in sorted(signal_codes):
if code in data:
df = data[code]
has_hfq = 'close_hfq' in df.columns if 'close' in df.columns else False
print(f"{code}: {len(df)} 条, 有 close_hfq: {has_hfq}")
else:
print(f"{code}: 数据缺失")
print(f"\n交易标的ETF: {len(trade_codes)}")
for code in sorted(trade_codes):
if code in data:
df = data[code]
has_nav = 'nav' in df.attrs
has_premium = 'premium_series' in df.attrs
print(f"{code}: {len(df)}")
print(f" close (最新): {df['close'].iloc[-1]:.4f}")
print(f" 有 nav: {has_nav}")
print(f" 有 premium: {has_premium}")
else:
print(f"{code}: 数据缺失")
# 验证关键指标
print("\n" + "=" * 70)
print("验证结果")
print("=" * 70)
# 检查指数数据
index_ok = all(code in data for code in signal_codes)
print(f"\n指数数据完整性: {'✓ 全部获取' if index_ok else '✗ 部分缺失'}")
# 检查 ETF 数据
etf_ok = all(code in data for code in trade_codes)
print(f"ETF 数据完整性: {'✓ 全部获取' if etf_ok else '✗ 部分缺失'}")
# 检查 ETF 是否使用 hfq对比 raw 和 hfq 的价格差异)
print("\n验证 ETF 是否使用 hfq抽样检查...")
from framework_v2.shared.data import FlaskAPIFetcher
fetcher = FlaskAPIFetcher()
etf_hfq_verified = 0
sample_codes = list(trade_codes)[:3] # 抽样前3个
# 获取日期范围
from datetime import date
start = config.backtest.start_date
end = config.backtest.end_date
if end is None:
end = date.today().strftime('%Y-%m-%d')
for code in sample_codes:
if code in data:
hfq_close = data[code]['close'].iloc[-1]
# 获取 raw 数据对比
raw_df = fetcher._source.fetch(code, start, end, adj='raw', asset_type='china_etf')
if raw_df is not None:
raw_close = raw_df['close'].iloc[-1]
ratio = hfq_close / raw_close if raw_close > 0 else 1
if ratio > 1.01: # 差异超过1%说明使用了 hfq
print(f"{code}: raw={raw_close:.4f}, hfq={hfq_close:.4f}, 倍数={ratio:.4f} (正确)")
etf_hfq_verified += 1
else:
print(f"{code}: raw={raw_close:.4f}, hfq={hfq_close:.4f}, 倍数={ratio:.4f} (错误)")
print(f"ETF 使用 hfq: {etf_hfq_verified}/{len(sample_codes)} {'✓ 正确' if etf_hfq_verified == len(sample_codes) else '✗ 错误'}")
# 总结
print("\n" + "=" * 70)
if index_ok and etf_ok and etf_hfq_verified == len(sample_codes):
print("✓ 验证通过:数据获取逻辑正确")
print(" - 指数使用 raw原始价格")
print(" - ETF 使用 hfq后复权价格")
else:
print("✗ 验证失败:数据获取存在问题")
print("=" * 70)
if __name__ == '__main__':
main()