## 核心功能 - get_trading_calendar(): 通过 API 获取准确交易日历 - 替换临时 pandas BDay 实现 - 调用 /api/v1/trading-calendar 端点 - 支持动态日期范围(start, end 参数) - 支持 A/US/HK 多市场 ## 端到端测试 - test_end_to_end.py: 完整流程测试(5 个阶段) - 阶段 1: 数据获取(纳指 502 天,创业板 484 天) - 阶段 2: 因子计算(MomentumFactor n_days=20) - 阶段 3: 数据对齐(CrossMarketAligner 到 A 股 484 天) - 阶段 4: 信号生成(Top-1,469 个信号) - 阶段 5: 收益计算(年化 51.71%,超额 96.37%) ## 测试验证 - 5/5 阶段通过 - API 日历: 484 个交易日(准确) - 纳指休市日: 18 天收益率 = 0% - 收益率 NaN: 0 - 跨市场对齐成功 ## 架构改进 - 从近似日历 → 准确 API 日历 - 无需手动维护节假日列表 - API 失败时抛出异常(不静默降级)
469 lines
15 KiB
Python
469 lines
15 KiB
Python
"""
|
||
端到端集成测试:数据获取 → 因子计算 → 数据对齐 → 信号生成
|
||
|
||
测试场景:
|
||
1. 获取纳指(美股)和创业板(A股)数据
|
||
2. 计算动量因子
|
||
3. 对齐到 A 股交易日历
|
||
4. 生成 Top-N 信号
|
||
5. 验证完整流程
|
||
|
||
目标:
|
||
- 验证 FlaskAPIFetcher 数据获取
|
||
- 验证 MomentumFactor 因子计算
|
||
- 验证 CrossMarketAligner 数据对齐
|
||
- 验证完整流程无数据泄漏
|
||
"""
|
||
|
||
import sys
|
||
from pathlib import Path
|
||
import pandas as pd
|
||
import numpy as np
|
||
from typing import Dict
|
||
|
||
# 添加项目根目录到路径
|
||
project_root = Path(__file__).parent.parent.parent
|
||
if str(project_root) not in sys.path:
|
||
sys.path.insert(0, str(project_root))
|
||
|
||
from framework_v2.shared.data import FlaskAPIFetcher, CrossMarketAligner
|
||
from framework_v2.shared.factors.momentum import MomentumFactor
|
||
|
||
|
||
def test_stage1_data_fetch():
|
||
"""
|
||
阶段 1: 数据获取
|
||
|
||
获取纳指(^IXIC)和创业板(399006.SZ)数据
|
||
"""
|
||
print("\n" + "=" * 70)
|
||
print(" 阶段 1: 数据获取")
|
||
print("=" * 70)
|
||
|
||
fetcher = FlaskAPIFetcher()
|
||
|
||
# 获取纳指数据(美股)
|
||
print("\n[1.1] 获取纳斯达克指数数据(美股)...")
|
||
us_data = fetcher.fetch_indices(
|
||
codes=["^IXIC"],
|
||
start="2023-01-01",
|
||
end="2024-12-31"
|
||
)
|
||
|
||
assert "^IXIC" in us_data, "纳指数据获取失败"
|
||
df_nasdaq = us_data["^IXIC"]
|
||
|
||
print(f"\n纳指数据:")
|
||
print(f" 数据量: {len(df_nasdaq)} 条")
|
||
print(f" 日期范围: {df_nasdaq.index[0]} ~ {df_nasdaq.index[-1]}")
|
||
print(f" 列: {list(df_nasdaq.columns)}")
|
||
print(f" 前 3 行:")
|
||
print(df_nasdaq.head(3).to_string())
|
||
|
||
# 获取创业板数据(A股)
|
||
print("\n[1.2] 获取创业板指数数据(A股)...")
|
||
cn_data = fetcher.fetch_indices(
|
||
codes=["399006.SZ"],
|
||
start="2023-01-01",
|
||
end="2024-12-31"
|
||
)
|
||
|
||
assert "399006.SZ" in cn_data, "创业板数据获取失败"
|
||
df_gem = cn_data["399006.SZ"]
|
||
|
||
print(f"\n创业板数据:")
|
||
print(f" 数据量: {len(df_gem)} 条")
|
||
print(f" 日期范围: {df_gem.index[0]} ~ {df_gem.index[-1]}")
|
||
print(f" 列: {list(df_gem.columns)}")
|
||
print(f" 前 3 行:")
|
||
print(df_gem.head(3).to_string())
|
||
|
||
# 对比日历差异
|
||
print(f"\n[1.3] 交易日历对比:")
|
||
nasdaq_dates = set(df_nasdaq.index)
|
||
gem_dates = set(df_gem.index)
|
||
|
||
common_dates = nasdaq_dates & gem_dates
|
||
only_nasdaq = nasdaq_dates - gem_dates
|
||
only_gem = gem_dates - nasdaq_dates
|
||
|
||
print(f" 纳指交易日: {len(nasdaq_dates)} 天")
|
||
print(f" 创业板交易日: {len(gem_dates)} 天")
|
||
print(f" 共同交易日: {len(common_dates)} 天")
|
||
print(f" 仅纳指交易: {len(only_nasdaq)} 天")
|
||
print(f" 仅创业板交易: {len(only_gem)} 天")
|
||
|
||
if len(only_nasdaq) > 0:
|
||
print(f" 纳指独有日期示例: {sorted(list(only_nasdaq))[:3]}")
|
||
if len(only_gem) > 0:
|
||
print(f" 创业板独有日期示例: {sorted(list(only_gem))[:3]}")
|
||
|
||
print("\n✓ 阶段 1 通过")
|
||
|
||
return {
|
||
"^IXIC": df_nasdaq,
|
||
"399006.SZ": df_gem
|
||
}
|
||
|
||
|
||
def test_stage2_factor_calculation(data_dict: Dict[str, pd.DataFrame]):
|
||
"""
|
||
阶段 2: 因子计算
|
||
|
||
计算动量因子(在原始日历上)
|
||
"""
|
||
print("\n" + "=" * 70)
|
||
print(" 阶段 2: 因子计算(原始日历)")
|
||
print("=" * 70)
|
||
|
||
factor_calc = MomentumFactor(n_days=20)
|
||
|
||
factors = {}
|
||
|
||
for code, df in data_dict.items():
|
||
print(f"\n[2.1] 计算 {code} 动量因子...")
|
||
|
||
# compute 方法接受 DataFrame
|
||
factor_series = factor_calc.compute(df)
|
||
|
||
# 转换为 DataFrame 格式
|
||
factor_result = pd.DataFrame({
|
||
'value': factor_series,
|
||
'is_filled': False
|
||
})
|
||
|
||
factors[code] = factor_result
|
||
|
||
print(f" 因子值数量: {len(factor_result)}")
|
||
print(f" 日期范围: {factor_result.index[0]} ~ {factor_result.index[-1]}")
|
||
print(f" 前 3 行:")
|
||
print(factor_result.head(3).to_string())
|
||
|
||
# 统计 NaN
|
||
nan_count = factor_result['value'].isna().sum()
|
||
print(f" NaN 数量: {nan_count} ({nan_count/len(factor_result):.1%})")
|
||
|
||
# 验证因子值合理
|
||
valid_factors = factor_result['value'].dropna()
|
||
if len(valid_factors) > 0:
|
||
print(f" 因子值范围: {valid_factors.min():.4f} ~ {valid_factors.max():.4f}")
|
||
|
||
print("\n✓ 阶段 2 通过")
|
||
|
||
return factors
|
||
|
||
|
||
def test_stage3_data_alignment(
|
||
factors: Dict[str, pd.DataFrame],
|
||
data_dict: Dict[str, pd.DataFrame]
|
||
):
|
||
"""
|
||
阶段 3: 数据对齐
|
||
|
||
将因子和收益率对齐到 A 股交易日历
|
||
"""
|
||
print("\n" + "=" * 70)
|
||
print(" 阶段 3: 数据对齐(到 A 股日历)")
|
||
print("=" * 70)
|
||
|
||
fetcher = FlaskAPIFetcher()
|
||
|
||
# 获取 A 股交易日历(通过 API)
|
||
print("\n[3.1] 获取 A 股交易日历(通过 API)...")
|
||
|
||
# 裁剪到数据日期范围
|
||
data_start = min(df.index[0] for df in data_dict.values())
|
||
data_end = max(df.index[-1] for df in data_dict.values())
|
||
|
||
# 使用 API 获取准确日历
|
||
a_share_calendar = fetcher.get_trading_calendar(
|
||
market='A',
|
||
start=data_start.strftime('%Y-%m-%d'),
|
||
end=data_end.strftime('%Y-%m-%d')
|
||
)
|
||
|
||
print(f" A 股交易日: {len(a_share_calendar)} 天")
|
||
print(f" 日期范围: {a_share_calendar[0]} ~ {a_share_calendar[-1]}")
|
||
|
||
# 创建对齐器
|
||
aligner = CrossMarketAligner(target_calendar=a_share_calendar)
|
||
|
||
# 对齐因子
|
||
print("\n[3.2] 对齐因子到 A 股日历...")
|
||
aligned_factors = {}
|
||
|
||
for code, factor_df in factors.items():
|
||
print(f"\n 对齐 {code} 因子...")
|
||
|
||
# 获取原始日历
|
||
original_calendar = factor_df.index
|
||
|
||
# 对齐因子
|
||
aligned = aligner.align_factor(
|
||
factor_series=factor_df['value'],
|
||
source_calendar=original_calendar,
|
||
code=code
|
||
)
|
||
|
||
aligned_factors[code] = aligned
|
||
|
||
# 统计
|
||
filled_count = aligned['is_filled'].sum()
|
||
print(f" 对齐后天数: {len(aligned)}")
|
||
print(f" 填充天数: {filled_count} ({filled_count/len(aligned):.1%})")
|
||
print(f" NaN 数量: {aligned['value'].isna().sum()}")
|
||
|
||
# 对齐收益率
|
||
print("\n[3.3] 对齐收益率到 A 股日历...")
|
||
aligned_returns = {}
|
||
|
||
for code, df in data_dict.items():
|
||
print(f"\n 对齐 {code} 收益率...")
|
||
|
||
returns = aligner.align_returns(
|
||
close_series=df['close'],
|
||
code=code
|
||
)
|
||
|
||
aligned_returns[code] = returns
|
||
|
||
# 统计
|
||
print(f" 对齐后天数: {len(returns)}")
|
||
print(f" 收益率范围: {returns.min():.4%} ~ {returns.max():.4%}")
|
||
print(f" NaN 数量: {returns.isna().sum()}")
|
||
print(f" 零收益率天数: {(returns == 0).sum()} (休市日)")
|
||
|
||
# 验证对齐结果
|
||
print("\n[3.4] 验证对齐结果...")
|
||
|
||
# 1. 所有 DataFrame 应该有相同的索引
|
||
indices = [df.index for df in aligned_factors.values()]
|
||
indices.extend([s.index for s in aligned_returns.values()])
|
||
|
||
for i, idx1 in enumerate(indices):
|
||
for j, idx2 in enumerate(indices):
|
||
if i != j:
|
||
assert idx1.equals(idx2), f"索引 {i} 和 {j} 不一致"
|
||
|
||
print(f" ✓ 所有数据对齐到同一日历: {len(indices[0])} 天")
|
||
print(f" ✓ 日期范围: {indices[0][0]} ~ {indices[0][-1]}")
|
||
|
||
# 2. 验证收益率无 NaN
|
||
for code, returns in aligned_returns.items():
|
||
assert returns.isna().sum() == 0, f"{code} 收益率包含 NaN"
|
||
print(f" ✓ 收益率无 NaN")
|
||
|
||
# 3. 验证休市日收益率 = 0
|
||
for code, returns in aligned_returns.items():
|
||
zero_days = (returns == 0).sum()
|
||
print(f" {code} 休市日收益率 = 0: {zero_days} 天")
|
||
|
||
print("\n✓ 阶段 3 通过")
|
||
|
||
return aligned_factors, aligned_returns
|
||
|
||
|
||
def test_stage4_signal_generation(
|
||
aligned_factors: Dict[str, pd.DataFrame],
|
||
aligned_returns: Dict[str, pd.Series]
|
||
):
|
||
"""
|
||
阶段 4: 信号生成
|
||
|
||
根据对齐后的因子生成 Top-N 信号
|
||
"""
|
||
print("\n" + "=" * 70)
|
||
print(" 阶段 4: 信号生成")
|
||
print("=" * 70)
|
||
|
||
# 合并因子值
|
||
print("\n[4.1] 合并因子值...")
|
||
|
||
factor_values = pd.DataFrame()
|
||
for code, factor_df in aligned_factors.items():
|
||
factor_values[code] = factor_df['value']
|
||
|
||
print(f" 合并后形状: {factor_values.shape}")
|
||
print(f" 列: {list(factor_values.columns)}")
|
||
print(f" 前 3 行:")
|
||
print(factor_values.head(3).to_string())
|
||
|
||
# 简单信号:选择因子值最高的标的
|
||
print("\n[4.2] 生成信号(Top-1)...")
|
||
|
||
# 跳过全为 NaN 的行
|
||
valid_rows = factor_values.dropna(how='all').index
|
||
factor_valid = factor_values.loc[valid_rows]
|
||
|
||
signals = pd.DataFrame()
|
||
signals['best'] = factor_valid.idxmax(axis=1)
|
||
signals['best_value'] = factor_valid.max(axis=1)
|
||
|
||
print(f" 信号数量: {len(signals)}")
|
||
print(f" 前 10 个信号:")
|
||
print(signals.head(10).to_string())
|
||
|
||
# 统计选择分布
|
||
print(f"\n[4.3] 标的选择分布:")
|
||
distribution = signals['best'].value_counts()
|
||
for code, count in distribution.items():
|
||
pct = count / len(signals)
|
||
print(f" {code}: {count} 天 ({pct:.1%})")
|
||
|
||
# 验证信号与收益率对齐
|
||
print("\n[4.4] 验证信号与收益率对齐...")
|
||
|
||
returns_df = pd.DataFrame(aligned_returns)
|
||
|
||
# 裁剪到共同日期
|
||
common_dates = signals.index.intersection(returns_df.index)
|
||
signals_aligned = signals.loc[common_dates]
|
||
returns_aligned = returns_df.loc[common_dates]
|
||
|
||
print(f" 信号日期: {len(signals)} → {len(signals_aligned)}")
|
||
print(f" 收益日期: {len(returns_df)} → {len(returns_aligned)}")
|
||
print(f" 共同日期: {len(common_dates)}")
|
||
|
||
assert signals_aligned.index.equals(returns_aligned.index), "信号与收益日期不一致"
|
||
print(f" ✓ 信号与收益率日期一致")
|
||
|
||
print("\n✓ 阶段 4 通过")
|
||
|
||
return signals_aligned, returns_aligned
|
||
|
||
|
||
def test_stage5_strategy_returns(signals: pd.DataFrame, returns: pd.DataFrame):
|
||
"""
|
||
阶段 5: 计算策略收益
|
||
|
||
根据信号计算策略净值曲线
|
||
"""
|
||
print("\n" + "=" * 70)
|
||
print(" 阶段 5: 计算策略收益")
|
||
print("=" * 70)
|
||
|
||
print("\n[5.1] 计算策略日收益...")
|
||
|
||
strategy_returns = pd.Series(index=returns.index, dtype=float)
|
||
|
||
for date in returns.index:
|
||
if date in signals.index:
|
||
best_code = signals.loc[date, 'best']
|
||
strategy_returns[date] = returns.loc[date, best_code]
|
||
else:
|
||
strategy_returns[date] = 0.0
|
||
|
||
# 填充 NaN
|
||
strategy_returns = strategy_returns.fillna(0.0)
|
||
|
||
print(f" 策略收益天数: {len(strategy_returns)}")
|
||
print(f" 收益范围: {strategy_returns.min():.4%} ~ {strategy_returns.max():.4%}")
|
||
|
||
print("\n[5.2] 计算累计收益...")
|
||
|
||
cumulative_returns = (1 + strategy_returns).cumprod() - 1
|
||
|
||
print(f" 最终累计收益: {cumulative_returns.iloc[-1]:.2%}")
|
||
print(f" 最大累计收益: {cumulative_returns.max():.2%}")
|
||
print(f" 最小累计收益: {cumulative_returns.min():.2%}")
|
||
|
||
print("\n[5.3] 计算年化收益和最大回撤...")
|
||
|
||
# 年化收益
|
||
total_days = len(strategy_returns)
|
||
annual_return = (1 + cumulative_returns.iloc[-1]) ** (252 / total_days) - 1
|
||
print(f" 年化收益: {annual_return:.2%}")
|
||
|
||
# 最大回撤
|
||
rolling_max = cumulative_returns.cummax()
|
||
drawdown = (cumulative_returns - rolling_max) / (1 + rolling_max)
|
||
max_drawdown = drawdown.min()
|
||
print(f" 最大回撤: {max_drawdown:.2%}")
|
||
|
||
print("\n[5.4] 策略收益 vs 基准对比...")
|
||
|
||
# 基准:等权持有
|
||
benchmark_returns = returns.mean(axis=1)
|
||
benchmark_cumulative = (1 + benchmark_returns).cumprod() - 1
|
||
|
||
print(f" 策略累计收益: {cumulative_returns.iloc[-1]:.2%}")
|
||
print(f" 基准累计收益: {benchmark_cumulative.iloc[-1]:.2%}")
|
||
print(f" 超额收益: {cumulative_returns.iloc[-1] - benchmark_cumulative.iloc[-1]:.2%}")
|
||
|
||
print("\n✓ 阶段 5 通过")
|
||
|
||
return strategy_returns, cumulative_returns
|
||
|
||
|
||
def run_full_pipeline():
|
||
"""
|
||
运行完整流程
|
||
"""
|
||
print("\n" + "=" * 70)
|
||
print(" 端到端集成测试:数据获取 → 因子计算 → 数据对齐 → 信号生成")
|
||
print("=" * 70)
|
||
print("\n测试标的:")
|
||
print(" - 纳斯达克指数 (^IXIC) - 美股")
|
||
print(" - 创业板指数 (399006.SZ) - A 股")
|
||
print("\n时间范围: 2023-01-01 ~ 2024-12-31")
|
||
print("\n" + "=" * 70)
|
||
|
||
try:
|
||
# 阶段 1: 数据获取
|
||
data_dict = test_stage1_data_fetch()
|
||
|
||
# 阶段 2: 因子计算
|
||
factors = test_stage2_factor_calculation(data_dict)
|
||
|
||
# 阶段 3: 数据对齐
|
||
aligned_factors, aligned_returns = test_stage3_data_alignment(
|
||
factors, data_dict
|
||
)
|
||
|
||
# 阶段 4: 信号生成
|
||
signals, returns = test_stage4_signal_generation(
|
||
aligned_factors, aligned_returns
|
||
)
|
||
|
||
# 阶段 5: 策略收益
|
||
strategy_returns, cumulative_returns = test_stage5_strategy_returns(
|
||
signals, returns
|
||
)
|
||
|
||
# 总结
|
||
print("\n" + "=" * 70)
|
||
print(" 测试总结")
|
||
print("=" * 70)
|
||
print("\n✅ 所有阶段通过!")
|
||
print("\n流程验证:")
|
||
print(" ✓ 数据获取: FlaskAPIFetcher 成功获取线上数据")
|
||
print(" ✓ 因子计算: MomentumFactor 在原始日历计算")
|
||
print(" ✓ 数据对齐: CrossMarketAligner 对齐到 A 股日历")
|
||
print(" ✓ 信号生成: Top-N 选择逻辑正确")
|
||
print(" ✓ 收益计算: 策略净值曲线生成成功")
|
||
print("\n关键验证:")
|
||
print(" ✓ 跨市场日历差异已处理")
|
||
print(" ✓ 休市日收益率 = 0% (无 ffill 陷阱)")
|
||
print(" ✓ 收益率无 NaN")
|
||
print(" ✓ 信号与收益日期一致")
|
||
print("\n" + "=" * 70 + "\n")
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"\n✗ 测试失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
|
||
if __name__ == "__main__":
|
||
success = run_full_pipeline()
|
||
|
||
if success:
|
||
print("🎉 端到端测试通过!")
|
||
sys.exit(0)
|
||
else:
|
||
print("❌ 端到端测试失败!")
|
||
sys.exit(1)
|