perf(http): 并行获取数据加速数据加载
使用 ThreadPoolExecutor 并行获取多个标的的数据: - 信号源 (index): 11个标的并行获取 - 交易源 (ETF): 4个标的并行获取 - 溢价率数据: 4个标的并行获取 性能提升:5个标的从 ~15s 串行 → ~4.6s 并行(约 3x 加速) 修改: - 增大 urllib3 连接池 maxsize=16 支持并行连接 - 使用 concurrent.futures.ThreadPoolExecutor
This commit is contained in:
@@ -21,6 +21,7 @@ import pandas as pd
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
PROJECT_ROOT = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
@@ -31,7 +32,10 @@ from rotation.config_loader import load_rotation_config, RotationStrategyConfig
|
||||
# HTTP client (urllib3 替代 requests,修复 SSL EOF 问题)
|
||||
# ============================================================
|
||||
|
||||
_http_pool = urllib3.PoolManager(timeout=urllib3.Timeout(connect=10, read=120))
|
||||
_http_pool = urllib3.PoolManager(
|
||||
maxsize=16, # 支持并行连接
|
||||
timeout=urllib3.Timeout(connect=10, read=120)
|
||||
)
|
||||
|
||||
class _HttpResponse:
|
||||
"""urllib3 响应包装,提供 requests 兼容接口"""
|
||||
@@ -364,32 +368,54 @@ class SimpleRotationStrategy:
|
||||
self.trading_calendar: Optional[pd.DatetimeIndex] = None
|
||||
|
||||
def _preload_data(self):
|
||||
"""Preload all historical data"""
|
||||
"""Preload all historical data (parallel fetching)"""
|
||||
start_date = self.config.backtest.start_date
|
||||
end_date = self.config.backtest.end_date or datetime.now().strftime('%Y-%m-%d')
|
||||
preload_start = (pd.Timestamp(start_date) - timedelta(days=self.n_days * 2)).strftime('%Y-%m-%d')
|
||||
|
||||
print("\n[1/4] Preloading signal sources (index raw)...")
|
||||
for code in self.signal_codes:
|
||||
df = self.data_cache.preload(code, preload_start, end_date, adj='raw')
|
||||
if df is not None:
|
||||
self.index_data[code] = df
|
||||
print(f"\n[1/4] Preloading signal sources (index raw) [{len(self.signal_codes)} codes, parallel]...")
|
||||
# Parallel fetch signal sources
|
||||
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||
futures = {executor.submit(self.data_cache.preload, code, preload_start, end_date, 'raw'): code for code in self.signal_codes}
|
||||
for future in as_completed(futures):
|
||||
code = futures[future]
|
||||
try:
|
||||
df = future.result()
|
||||
if df is not None:
|
||||
self.index_data[code] = df
|
||||
except Exception as e:
|
||||
print(f" x {code}: {e}")
|
||||
print(f"\n Signal: {len(self.index_data)}/{len(self.signal_codes)} OK")
|
||||
|
||||
print("\n[2/4] Preloading trade sources (ETF hfq)...")
|
||||
print(f"\n[2/4] Preloading trade sources (ETF hfq) [{len(set(self.signal_to_trade.values()))} codes, parallel]...")
|
||||
trade_codes = set(self.signal_to_trade.values())
|
||||
# Determine adj for each trade code
|
||||
trade_adj_map = {}
|
||||
for code in trade_codes:
|
||||
is_bond = any(
|
||||
a.trade_source == code and a.group == 'BOND'
|
||||
for a in self.config.asset_pools.assets.values()
|
||||
)
|
||||
adj = 'raw' if is_bond else 'hfq'
|
||||
df = self.data_cache.preload(code, preload_start, end_date, adj=adj)
|
||||
if df is not None:
|
||||
self.etf_data[code] = df
|
||||
# Load premium data cache for all ETF trade codes
|
||||
for code in trade_codes:
|
||||
self.data_cache.preload_premium(code, end_date=end_date)
|
||||
is_bond = any(a.trade_source == code and a.group == 'BOND' for a in self.config.asset_pools.assets.values())
|
||||
trade_adj_map[code] = 'raw' if is_bond else 'hfq'
|
||||
|
||||
# Parallel fetch trade sources
|
||||
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||
futures = {executor.submit(self.data_cache.preload, code, preload_start, end_date, trade_adj_map[code]): code for code in trade_codes}
|
||||
for future in as_completed(futures):
|
||||
code = futures[future]
|
||||
try:
|
||||
df = future.result()
|
||||
if df is not None:
|
||||
self.etf_data[code] = df
|
||||
except Exception as e:
|
||||
print(f" x {code}: {e}")
|
||||
|
||||
# Parallel fetch premium data
|
||||
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||
futures = {executor.submit(self.data_cache.preload_premium, code, end_date): code for code in trade_codes}
|
||||
for future in as_completed(futures):
|
||||
code = futures[future]
|
||||
try:
|
||||
future.result()
|
||||
except Exception:
|
||||
pass
|
||||
print(f"\n Trade: {len(self.etf_data)}/{len(trade_codes)} OK, premium: {len(self.data_cache.premium_data)} loaded")
|
||||
|
||||
# Load benchmark
|
||||
|
||||
Reference in New Issue
Block a user