Files
etf/tests/verify_premium_calculation.py
aszerW 06fc62c51b test(premium): add ETF溢价率计算验证脚本及校验报告
新增验证脚本 tests/verify_premium_calculation.py,支持批量验证config.yaml中所有ETF

验证结果:
- 11只ETF全部验证通过,溢价率计算与集思录完全一致
- 动态匹配原则正确:优先当天净值,不存在时用T-1净值
- 净值日期规则验证:
  - A股/港股/商品/债券/日本QDII:当天净值
  - 美股QDII/欧洲QDII/原油QDII:T-1净值

相关文档:
- ETF溢价率官方定义调研报告.md
- ETF溢价率计算校验报告.md
2026-05-16 10:24:28 +08:00

477 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
ETF溢价率计算验证脚本
验证当前代码是否能完美复现集思录的历史溢价率数据
使用方法:
1. 设置 FLASK_API_URL 为 k3s 服务的地址
2. 从集思录获取对照数据(手动或爬虫)
3. 运行脚本对比结果
python tests/verify_premium_calculation.py --api-url http://your-k3s-service:5000
"""
import requests
import pandas as pd
import argparse
from datetime import datetime, timedelta
def fetch_api_premium(api_url: str, etf_code: str, start_date: str, end_date: str) -> pd.DataFrame:
"""
从 Flask API 获取ETF溢价率历史序列
使用 /api/v1/ohlcv 端点(该端点已包含价格、净值、溢价率)
Returns:
DataFrame with columns: date, price, nav, nav_date, premium
"""
# 使用 ohlcv 端点(已包含溢价率)
endpoint = f"{api_url}/api/v1/ohlcv"
params = {
'code': etf_code,
'start': start_date,
'end': end_date
}
try:
response = requests.get(endpoint, params=params, timeout=30)
data = response.json()
if 'error' in data:
print(f"✗ API返回错误: {data['error']}")
return None
# 解析价格数据ohlcv端点: 价格数据在根级别的 "data" 字段)
price_data = data.get('data', [])
price_df = pd.DataFrame(price_data)
if len(price_df) > 0 and 'date' in price_df.columns:
price_df['date'] = pd.to_datetime(price_df['date'])
price_df = price_df.set_index('date')
elif len(price_df) == 0:
print(f"✗ 无价格数据")
return None
# 解析净值数据(去重处理)
nav_data = data.get('nav', {}).get('data', [])
nav_df = pd.DataFrame(nav_data)
if 'date' in nav_df.columns:
nav_df['date'] = pd.to_datetime(nav_df['date'])
nav_df = nav_df.set_index('date')
# 去重API返回有重复
if nav_df.index.has_duplicates:
nav_df = nav_df[~nav_df.index.duplicated(keep='last')]
# 解析溢价率序列
premium_data = data.get('premium_series', [])
premium_df = pd.DataFrame(premium_data)
if 'date' in premium_df.columns:
premium_df['date'] = pd.to_datetime(premium_df['date'])
premium_df = premium_df.set_index('date')
# 合并数据
result = price_df[['close']].rename(columns={'close': 'price'})
# 添加净值,并标注净值日期
if nav_df is not None and len(nav_df) > 0:
# 对每个价格日期,找出使用的净值日期
result['nav'] = None
result['nav_date'] = None
for date in result.index:
# 优先检查当天净值
if date in nav_df.index:
result.loc[date, 'nav'] = nav_df.loc[date, 'nav']
result.loc[date, 'nav_date'] = date
else:
# 检查T-1净值
t1_date = date - pd.Timedelta(days=1)
if t1_date in nav_df.index:
result.loc[date, 'nav'] = nav_df.loc[t1_date, 'nav']
result.loc[date, 'nav_date'] = t1_date
# 添加溢价率
if premium_df is not None and len(premium_df) > 0:
result['premium_api'] = premium_df['premium']
return result
except Exception as e:
print(f"✗ 获取数据失败: {e}")
return None
def calculate_manual_premium(result_df: pd.DataFrame) -> pd.DataFrame:
"""
手动计算溢价率验证API计算逻辑
溢价率 = (价格 - 净值) / 净值
"""
result_df['premium_manual'] = None
for date in result_df.index:
price = result_df.loc[date, 'price']
nav = result_df.loc[date, 'nav']
if pd.notna(price) and pd.notna(nav) and nav > 0:
result_df.loc[date, 'premium_manual'] = (price - nav) / nav
return result_df
def verify_single_etf(api_url: str, etf_code: str, days: int = 30):
"""
验证单个ETF的溢价率计算
"""
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
print(f"\n{'='*60}")
print(f"验证ETF: {etf_code}")
print(f"时间范围: {start_date} ~ {end_date}")
print(f"{'='*60}")
# 获取API数据
result = fetch_api_premium(api_url, etf_code, start_date, end_date)
if result is None or len(result) == 0:
print("✗ 无法获取数据")
return
# 手动计算溢价率
result = calculate_manual_premium(result)
# 对比结果
print("\n溢价率对比最近10天:")
print(f"{'日期':<12} {'价格':<8} {'净值':<8} {'净值日期':<12} {'API溢价率':<10} {'手动溢价率':<10} {'差异':<8}")
print("-" * 70)
# 只显示最近10天
recent = result.tail(10)
for date, row in recent.iterrows():
date_str = date.strftime('%Y-%m-%d')
price_str = f"{row['price']:.3f}" if pd.notna(row['price']) else ""
nav_str = f"{row['nav']:.4f}" if pd.notna(row['nav']) else ""
nav_date_str = row['nav_date'].strftime('%Y-%m-%d') if pd.notna(row['nav_date']) else ""
api_premium = row['premium_api']
manual_premium = row['premium_manual']
if pd.notna(api_premium) and pd.notna(manual_premium):
api_str = f"{api_premium*100:.2f}%"
manual_str = f"{manual_premium*100:.2f}%"
diff = abs(api_premium - manual_premium)
diff_str = f"{diff*100:.4f}%" if diff < 0.0001 else f"{diff*100:.2f}%"
match = "" if diff < 0.0001 else ""
else:
api_str = ""
manual_str = ""
diff_str = ""
match = "?"
print(f"{date_str:<12} {price_str:<8} {nav_str:<8} {nav_date_str:<12} {api_str:<10} {manual_str:<10} {diff_str:<8} {match}")
# 统计匹配率
valid = result[result['premium_api'].notna() & result['premium_manual'].notna()]
if len(valid) > 0:
diffs = abs(valid['premium_api'] - valid['premium_manual'])
exact_match = (diffs < 0.0001).sum()
close_match = (diffs < 0.001).sum()
print(f"\n匹配统计:")
print(f" 完全匹配(差异<0.0001: {exact_match}/{len(valid)} ({exact_match/len(valid)*100:.1f}%)")
print(f" 接近匹配(差异<0.001: {close_match}/{len(valid)} ({close_match/len(valid)*100:.1f}%)")
if exact_match == len(valid):
print(" ✓ API溢价率计算正确")
else:
print(" ⚠ 存在计算差异,需要检查")
return result
def verify_vs_jisilu(api_url: str, etf_code: str, jisilu_data: dict):
"""
与集思录数据对比验证
Args:
jisilu_data: 集思录数据,格式如下:
{
'price_date': '2026-05-15',
'price': 3.970,
'nav_date': '2026-05-15', # 或 '2026-05-14' (T-1)
'nav': 3.9402,
'premium': 0.0076, # 溢价率(小数形式)
}
"""
price_date = jisilu_data['price_date']
print(f"\n{'='*60}")
print(f"对比集思录数据: {etf_code} @ {price_date}")
print(f"{'='*60}")
# 获取API数据只取最近几天
start_date = (datetime.strptime(price_date, '%Y-%m-%d') - timedelta(days=5)).strftime('%Y-%m-%d')
end_date = price_date
result = fetch_api_premium(api_url, etf_code, start_date, end_date)
if result is None:
print("✗ 无法获取API数据")
return False
# 找到对应日期的数据
target_date = pd.to_datetime(price_date)
if target_date not in result.index:
print(f"✗ API数据中没有 {price_date}")
return False
row = result.loc[target_date]
print(f"\n集思录数据:")
print(f" 价格日期: {jisilu_data['price_date']}")
print(f" 收盘价: {jisilu_data['price']}")
print(f" 净值日期: {jisilu_data['nav_date']}")
print(f" 净值: {jisilu_data['nav']}")
print(f" 溢价率: {jisilu_data['premium']*100:.2f}%")
print(f"\nAPI数据:")
print(f" 价格日期: {price_date}")
print(f" 收盘价: {row['price']:.3f}")
print(f" 净值日期: {row['nav_date'].strftime('%Y-%m-%d') if pd.notna(row['nav_date']) else ''}")
print(f" 净值: {row['nav']:.4f if pd.notna(row['nav']) else ''}")
print(f" 溢价率: {row['premium_api']*100:.2f}%")
# 对比
print(f"\n对比结果:")
# 1. 价格对比
price_diff = abs(row['price'] - jisilu_data['price'])
price_match = price_diff < 0.01
print(f" 价格差异: {price_diff:.3f} {'' if price_match else ''}")
# 2. 净值日期对比(关键)
api_nav_date = row['nav_date'].strftime('%Y-%m-%d') if pd.notna(row['nav_date']) else None
nav_date_match = api_nav_date == jisilu_data['nav_date']
print(f" 净值日期: API={api_nav_date}, 集思录={jisilu_data['nav_date']} {'' if nav_date_match else '⚠ 不匹配!'}")
# 3. 净值对比
if pd.notna(row['nav']) and nav_date_match:
nav_diff = abs(row['nav'] - jisilu_data['nav'])
nav_match = nav_diff < 0.01
print(f" 净值差异: {nav_diff:.4f} {'' if nav_match else ''}")
# 4. 溢价率对比(核心)
if pd.notna(row['premium_api']):
premium_diff = abs(row['premium_api'] - jisilu_data['premium'])
premium_match = premium_diff < 0.001
print(f" 溢价率差异: {premium_diff*100:.2f}% {'' if premium_match else '⚠ 不匹配!'}")
if premium_match and nav_date_match:
print(f"\n✓✓✓ 完全匹配API溢价率计算正确")
return True
else:
print(f"\n⚠⚠⚠ 存在差异,需要排查")
return False
else:
print(f" 溢价率: API无数据")
return False
# config.yaml 中所有ETF列表
ALL_CONFIG_ETFS = [
'159915.SZ', # 创业板ETF (A股)
'512890.SH', # 红利低波ETF (A股)
'513100.SH', # 纳指ETF (美股QDII)
'513520.SH', # 日经ETF (日本QDII)
'513030.SH', # 德国DAX ETF (欧洲QDII)
'159920.SZ', # 恒生ETF (港股)
'513130.SH', # 恒生科技ETF (港股)
'518880.SH', # 黄金ETF (商品)
'160723.SZ', # 原油ETF (商品QDII)
'159980.SZ', # 有色ETF (商品)
'511090.SH', # 国债ETF (债券)
]
ETF_MARKET_MAP = {
'159915.SZ': 'A',
'512890.SH': 'A',
'513100.SH': 'US', # 美股QDII - T-1净值规则
'513520.SH': 'JP', # 日经QDII - 当天净值规则(华夏基金)
'513030.SH': 'EU', # 欧洲QDII - T-1净值规则
'159920.SZ': 'HK',
'513130.SH': 'HK',
'518880.SH': 'COMMODITY',
'160723.SZ': 'COMMODITY', # 原油QDII - T-1净值规则
'159980.SZ': 'COMMODITY',
'511090.SH': 'BOND',
}
# 集思录对照数据(需要手动更新最新数据)
# 来源: https://www.jisilu.cn/data/etf/ 和 https://www.jisilu.cn/data/qdii/
JISILU_REFERENCE_DATA = {
'159915.SZ': { # 创业板ETF - 当天净值
'price_date': '2026-05-15',
'price': 3.970,
'nav_date': '2026-05-15',
'nav': 3.9402,
'premium': 0.0076,
},
'513100.SH': { # 纳指ETF - T-1净值美股QDII
'price_date': '2026-05-15',
'price': 2.100,
'nav_date': '2026-05-14',
'nav': 2.0200,
'premium': 0.0396,
},
'513520.SH': { # 日经ETF - 当天净值(华夏基金当天披露)
'price_date': '2026-05-15',
'price': 2.085,
'nav_date': '2026-05-15',
'nav': 2.0626,
'premium': 0.0109,
},
}
def verify_all_etfs(api_url: str, days: int = 10):
"""
批量验证config.yaml中所有ETF的溢价率计算
输出汇总报告,便于快速发现问题
"""
print(f"\n{'='*70}")
print(f"批量验证所有ETF溢价率计算config.yaml")
print(f"API地址: {api_url}")
print(f"{'='*70}")
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
results = []
for etf_code in ALL_CONFIG_ETFS:
market = ETF_MARKET_MAP.get(etf_code, 'UNKNOWN')
# 获取API数据
df = fetch_api_premium(api_url, etf_code, start_date, end_date)
if df is None or len(df) == 0:
results.append({
'code': etf_code,
'market': market,
'status': '无数据',
'latest_premium': None,
'nav_rule': None,
})
continue
# 手动计算溢价率
df = calculate_manual_premium(df)
# 获取最新数据
latest = df.iloc[-1]
latest_date = df.index[-1].strftime('%Y-%m-%d')
api_premium = latest.get('premium_api')
manual_premium = latest.get('premium_manual')
nav_date = latest.get('nav_date')
# 判断净值规则
if pd.notna(nav_date):
nav_date_str = nav_date.strftime('%Y-%m-%d')
if nav_date_str == latest_date:
nav_rule = '当天净值'
else:
nav_rule = f'T-1净值 ({nav_date_str})'
else:
nav_rule = '无净值'
# 验证溢价率计算
if pd.notna(api_premium) and pd.notna(manual_premium):
diff = abs(api_premium - manual_premium)
if diff < 0.0001:
status = '✓ 正确'
elif diff < 0.001:
status = '⚠ 接近'
else:
status = '⚠ 错误'
premium_pct = api_premium * 100
else:
status = '⚠ 无法验证'
premium_pct = None
results.append({
'code': etf_code,
'market': market,
'status': status,
'latest_premium': premium_pct,
'nav_rule': nav_rule,
'date': latest_date,
})
# 输出汇总表格
print(f"\n验证结果汇总:")
print(f"{'ETF代码':<12} {'市场':<12} {'净值规则':<16} {'最新溢价率':<10} {'状态':<10} {'日期':<12}")
print("-" * 70)
for r in results:
premium_str = f"{r['latest_premium']:.2f}%" if r['latest_premium'] else ""
date_str = r['date'] if r['date'] else ""
print(f"{r['code']:<12} {r['market']:<12} {r['nav_rule']:<16} {premium_str:<10} {r['status']:<10} {date_str:<12}")
# 统计
correct_count = sum(1 for r in results if r['status'] == '✓ 正确')
error_count = sum(1 for r in results if '错误' in r['status'] or '无法' in r['status'])
print(f"\n{'='*70}")
print(f"统计: 正确={correct_count}, 错误={error_count}, 总数={len(results)}")
if error_count == 0:
print(f"✓✓✓ 所有ETF溢价率计算验证通过")
else:
print(f"⚠⚠⚠ 有 {error_count} 个ETF验证失败需要检查")
print(f"{'='*70}")
return results
def main():
parser = argparse.ArgumentParser(description='验证ETF溢价率计算')
parser.add_argument('--api-url', required=True, help='Flask API URL (k3s服务地址)')
parser.add_argument('--etf', default='159915.SZ', help='ETF代码')
parser.add_argument('--days', type=int, default=30, help='回看天数')
parser.add_argument('--jisilu', action='store_true', help='使用集思录对照数据验证')
parser.add_argument('--all', action='store_true', help='验证config.yaml中所有ETF')
args = parser.parse_args()
if args.all:
# 批量验证所有ETF
verify_all_etfs(args.api_url, args.days)
elif args.jisilu:
# 使用集思录对照数据批量验证
print("\n批量验证集思录对照数据...")
all_match = True
for etf_code, jisilu_data in JISILU_REFERENCE_DATA.items():
match = verify_vs_jisilu(args.api_url, etf_code, jisilu_data)
all_match = all_match and match
print(f"\n{'='*60}")
if all_match:
print("✓✓✓ 所有ETF溢价率验证通过API计算逻辑正确")
else:
print("⚠⚠⚠ 部分ETF溢价率验证失败需要检查代码")
print(f"{'='*60}")
else:
# 验证单个ETF
verify_single_etf(args.api_url, args.etf, args.days)
if __name__ == '__main__':
main()