问题根因: - Python OpenSSL 3.5.4 + requests 2.32.4 + urllib3 2.5.0 版本不兼容 - requests 2.32.4 内部使用 urllib3 的方式与 urllib3 2.5.0 API 不兼容 - curl(SecureTransport)正常工作,但 Python requests(OpenSSL)失败 - 服务器(Caddy)使用 TLS 1.3 + X25519MLKEM768(后量子密钥交换) 修复方案: - 用 urllib3.PoolManager 直接发起 HTTP 请求(已验证可正常工作) - 封装 _http_get() 函数替代 requests.get() - 替换所有 requests 相关异常类型为 urllib3 异常 修改文件: - datasource/flask_api_source.py: 核心数据源层 - rotation/simple_rotation.py: 简单轮动策略层
522 lines
19 KiB
Python
522 lines
19 KiB
Python
"""
|
||
Flask API 数据源
|
||
|
||
通过部署后的 Flask API 服务获取 OHLCV 数据
|
||
支持远程调用,无需本地 SSH 隧道
|
||
"""
|
||
|
||
import os
|
||
import json
|
||
import time
|
||
import urllib3
|
||
import urllib.parse
|
||
import pandas as pd
|
||
from typing import Optional, Dict, List
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from dotenv import load_dotenv
|
||
|
||
from .models import OHLCVResponse, validate_ohlcv_response
|
||
|
||
load_dotenv()
|
||
|
||
# ============================================================
|
||
# HTTP client (urllib3 替代 requests,修复 SSL EOF 问题)
|
||
# ============================================================
|
||
|
||
_http_pool = urllib3.PoolManager()
|
||
|
||
def _http_get(url: str, params: dict = None, timeout: int = 120) -> urllib3.HTTPResponse:
|
||
"""使用 urllib3 发起 GET 请求(替代 requests.get,修复 OpenSSL 3.5 + Caddy 的 SSL EOF 问题)"""
|
||
if params:
|
||
url = url + '?' + urllib.parse.urlencode(params)
|
||
return _http_pool.request('GET', url, timeout=urllib3.Timeout(connect=10, read=timeout))
|
||
|
||
def _parse_json(resp: urllib3.HTTPResponse) -> dict:
|
||
"""解析 JSON 响应"""
|
||
return json.loads(resp.data.decode('utf-8'))
|
||
|
||
|
||
class FlaskAPIDataSource:
|
||
"""
|
||
Flask API 数据源
|
||
|
||
通过 HTTP API 获取数据,无需本地配置 SSH 隧道
|
||
适用于远程调用或生产环境
|
||
|
||
用法:
|
||
source = FlaskAPIDataSource(base_url="https://k3s.tokenpluse.xyz")
|
||
df = source.fetch("000300.SH", "2024-01-01", "2024-12-31")
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
base_url: str = None,
|
||
api_path: str = "/api/v1/ohlcv",
|
||
timeout: int = 120,
|
||
retries: int = 3
|
||
):
|
||
"""
|
||
初始化
|
||
|
||
Args:
|
||
base_url: API 服务基础地址,默认从环境变量读取
|
||
api_path: API 路径
|
||
timeout: 请求超时时间(秒)
|
||
retries: 重试次数
|
||
"""
|
||
self.base_url = base_url or os.getenv(
|
||
'FLASK_API_URL',
|
||
'https://k3s.tokenpluse.xyz'
|
||
)
|
||
self.api_path = api_path
|
||
self.timeout = timeout
|
||
self.retries = retries
|
||
|
||
# 确保 base_url 不以 / 结尾
|
||
self.base_url = self.base_url.rstrip('/')
|
||
|
||
def fetch(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str,
|
||
adj: str = 'raw',
|
||
asset_type: str = None,
|
||
timeframe: str = '1d'
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取单只标的 OHLCV 数据(支持 adj 参数)
|
||
|
||
Args:
|
||
code: 标的代码
|
||
start_date: 开始日期 YYYY-MM-DD
|
||
end_date: 结束日期 YYYY-MM-DD
|
||
adj: 复权类型 'raw'(原始) / 'qfq'(前复权) / 'hfq'(后复权),默认 'raw'
|
||
asset_type: 资产类型(可选,用于覆盖自动检测)
|
||
timeframe: K线周期(加密货币需要)
|
||
|
||
Returns:
|
||
DataFrame with columns: date, open, high, low, close, volume
|
||
adj='hfq' 时 A股 ETF 会额外返回 adj_factor, close_hfq
|
||
|
||
示例:
|
||
# 原始价格
|
||
df = source.fetch("000300.SH", "2020-01-01", "2024-12-31")
|
||
|
||
# A股股票后复权
|
||
df = source.fetch("000001.SZ", "2020-01-01", "2024-12-31", adj='hfq')
|
||
"""
|
||
# 构建请求 URL
|
||
url = f"{self.base_url}{self.api_path}"
|
||
|
||
# 构建请求参数(包含 adj)
|
||
params = {
|
||
'code': code,
|
||
'start': start_date,
|
||
'end': end_date,
|
||
'adj': adj, # 添加 adj 参数
|
||
}
|
||
|
||
# 加密货币需要 timeframe 参数
|
||
if asset_type == 'crypto' or code.upper() in ['BTC', 'ETH']:
|
||
params['timeframe'] = timeframe
|
||
|
||
# 可选:强制指定 asset_type
|
||
if asset_type:
|
||
params['asset_type'] = asset_type
|
||
|
||
for attempt in range(self.retries):
|
||
try:
|
||
response = _http_get(url, params=params, timeout=self.timeout)
|
||
|
||
if response.status != 200:
|
||
if attempt < self.retries - 1:
|
||
time.sleep(1 + attempt)
|
||
continue
|
||
print(f"✗ API请求失败: {response.status} - {response.data.decode('utf-8', errors='replace')[:100]}")
|
||
return None
|
||
|
||
# 解析 JSON
|
||
data = _parse_json(response)
|
||
|
||
# 检查错误
|
||
if 'error' in data:
|
||
print(f"✗ API返回错误: {data['error']}")
|
||
return None
|
||
|
||
# ✅ 使用 Pydantic 模型验证响应(类型安全)
|
||
try:
|
||
validated = validate_ohlcv_response(data)
|
||
except Exception as e:
|
||
print(f"✗ {code}: 响应数据验证失败 - {e}")
|
||
return None
|
||
|
||
# 检查数据是否为空
|
||
if not validated.data:
|
||
print(f"⚠ {code}: 无数据返回")
|
||
return None
|
||
|
||
# 转换为 DataFrame
|
||
df = pd.DataFrame(validated.data)
|
||
|
||
# 处理日期列
|
||
if 'date' in df.columns:
|
||
df['date'] = pd.to_datetime(df['date'])
|
||
df = df.set_index('date')
|
||
|
||
# 确保列名标准化(保留 code 列如果存在)
|
||
standard_cols = ['open', 'high', 'low', 'close', 'volume']
|
||
if 'code' in df.columns:
|
||
standard_cols = ['code'] + standard_cols
|
||
df = df[standard_cols]
|
||
|
||
# 使用 API 返回的实际数据范围(而非请求参数)
|
||
actual_start = validated.date_range.start if validated.date_range else start_date
|
||
actual_end = validated.date_range.end if validated.date_range else end_date
|
||
actual_count = validated.count
|
||
|
||
# 缓存 info 信息(如果有)
|
||
if validated.info:
|
||
df.attrs['info'] = validated.info
|
||
|
||
# ETF 数据自动附加净值和溢价率信息
|
||
if validated.asset_type == 'china_etf':
|
||
# 净值数据
|
||
if validated.nav and validated.nav.data:
|
||
nav_df = pd.DataFrame(validated.nav.data)
|
||
if 'date' in nav_df.columns:
|
||
nav_df['date'] = pd.to_datetime(nav_df['date'])
|
||
nav_df = nav_df.set_index('date')
|
||
df.attrs['nav'] = nav_df
|
||
|
||
# 溢价率序列
|
||
if validated.premium_series:
|
||
premium_dict = {item.date: item.premium for item in validated.premium_series}
|
||
df.attrs['premium_series'] = premium_dict
|
||
|
||
# 最新溢价率
|
||
if validated.latest_premium is not None:
|
||
df.attrs['latest_premium'] = validated.latest_premium
|
||
df.attrs['premium_date'] = validated.premium_date
|
||
|
||
# 溢价率统计
|
||
if validated.premium_stats:
|
||
df.attrs['premium_stats'] = validated.premium_stats.model_dump()
|
||
|
||
print(f"✓ {code}: {actual_count} 条数据 ({actual_start} ~ {actual_end})")
|
||
return df
|
||
|
||
except urllib3.exceptions.TimeoutError:
|
||
if attempt < self.retries - 1:
|
||
print(f"⚠ {code}: 请求超时,重试 {attempt + 2}/{self.retries}")
|
||
time.sleep(1 + attempt)
|
||
continue
|
||
print(f"✗ {code}: 请求超时")
|
||
return None
|
||
|
||
except (urllib3.exceptions.SSLError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError) as e:
|
||
if attempt < self.retries - 1:
|
||
print(f"⚠ {code}: {type(e).__name__},重试 {attempt + 2}/{self.retries}")
|
||
time.sleep(1 + attempt)
|
||
continue
|
||
print(f"✗ {code}: {type(e).__name__} after {self.retries} retries")
|
||
return None
|
||
|
||
except urllib3.exceptions.HTTPError as e:
|
||
if attempt < self.retries - 1:
|
||
time.sleep(1 + attempt)
|
||
continue
|
||
print(f"✗ {code}: 请求异常 - {e}")
|
||
return None
|
||
|
||
except json.JSONDecodeError as e:
|
||
print(f"✗ {code}: JSON解析失败 - {e}")
|
||
return None
|
||
|
||
return None
|
||
|
||
def fetch_batch(
|
||
self,
|
||
codes: List[str],
|
||
start_date: str,
|
||
end_date: str,
|
||
asset_types: Dict[str, str] = None
|
||
) -> Dict[str, Optional[pd.DataFrame]]:
|
||
"""
|
||
批量获取多只标的数据
|
||
|
||
Args:
|
||
codes: 标的代码列表
|
||
start_date: 开始日期
|
||
end_date: 结束日期
|
||
asset_types: 资产类型映射 {code: asset_type}
|
||
|
||
Returns:
|
||
{code: DataFrame}
|
||
"""
|
||
results = {}
|
||
asset_types = asset_types or {}
|
||
|
||
print(f"从 Flask API 获取 {len(codes)} 只标的...")
|
||
|
||
for i, code in enumerate(codes, 1):
|
||
asset_type = asset_types.get(code)
|
||
df = self.fetch(code, start_date, end_date, asset_type)
|
||
results[code] = df
|
||
|
||
# 显示进度
|
||
if i % 5 == 0 or i == len(codes):
|
||
success = sum(1 for v in results.values() if v is not None)
|
||
print(f" 进度: {i}/{len(codes)} (成功: {success})")
|
||
|
||
return results
|
||
|
||
def fetch_etf_nav(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取 ETF 净值数据
|
||
|
||
Args:
|
||
code: ETF代码
|
||
start_date: 开始日期
|
||
end_date: 结束日期
|
||
|
||
Returns:
|
||
DataFrame with nav column
|
||
"""
|
||
url = f"{self.base_url}/api/v1/etf/nav"
|
||
|
||
params = {
|
||
'code': code,
|
||
'start': start_date,
|
||
'end': end_date
|
||
}
|
||
|
||
try:
|
||
response = _http_get(url, params=params, timeout=self.timeout)
|
||
|
||
if response.status != 200:
|
||
return None
|
||
|
||
data = _parse_json(response)
|
||
|
||
if 'error' in data:
|
||
return None
|
||
|
||
# 解析净值数据
|
||
# Flask server 返回格式: {'nav': {'data': [...], 'count': N}, 'premium_series': [...]}
|
||
nav_section = data.get('nav', {})
|
||
records = nav_section.get('data', [])
|
||
|
||
if not records:
|
||
return None
|
||
|
||
df = pd.DataFrame(records)
|
||
if 'date' in df.columns:
|
||
df['date'] = pd.to_datetime(df['date'])
|
||
df = df.set_index('date')
|
||
|
||
# 添加溢价率信息(如果有)
|
||
if 'premium_series' in data:
|
||
df.attrs['premium_series'] = data['premium_series']
|
||
if 'latest_premium' in data:
|
||
df.attrs['latest_premium'] = data['latest_premium']
|
||
if 'premium_stats' in data:
|
||
df.attrs['premium_stats'] = data['premium_stats']
|
||
|
||
return df
|
||
|
||
except Exception as e:
|
||
print(f"✗ {code} 净值获取失败: {e}")
|
||
return None
|
||
|
||
def fetch_with_adj(
|
||
self,
|
||
code: str,
|
||
start_date: str,
|
||
end_date: str,
|
||
adj: str = 'raw',
|
||
asset_type: str = None,
|
||
timeframe: str = '1d'
|
||
) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取 OHLCV 数据(支持复权参数)- 简化版
|
||
|
||
直接调用 fetch(adj=adj),无需重复实现。
|
||
|
||
Args:
|
||
code: 标的代码
|
||
start_date: 开始日期 YYYY-MM-DD
|
||
end_date: 结束日期 YYYY-MM-DD
|
||
adj: 复权参数(raw/qfq/hfq),默认 'raw'
|
||
asset_type: 资产类型(可选)
|
||
timeframe: K线周期(加密货币需要)
|
||
|
||
Returns:
|
||
DataFrame,结构因 adj 参数略有不同
|
||
|
||
示例:
|
||
# A股股票后复权
|
||
df = source.fetch_with_adj("000001.SZ", "2020-01-01", "2024-12-31", adj='hfq')
|
||
"""
|
||
# 直接调用 fetch,传递 adj 参数
|
||
return self.fetch(code, start_date, end_date, adj, asset_type, timeframe)
|
||
|
||
def get_health(self) -> Dict:
|
||
"""获取服务健康状态"""
|
||
# 先尝试 ohlcv 端点检查服务是否可用
|
||
url = f"{self.base_url}{self.api_path}"
|
||
params = {'code': '000300.SH', 'start': '2024-01-01', 'end': '2024-01-05'}
|
||
|
||
try:
|
||
response = _http_get(url, params=params, timeout=self.timeout)
|
||
if response.status == 200:
|
||
data = _parse_json(response)
|
||
return {
|
||
'status': 'healthy',
|
||
'ssh_configured': True,
|
||
'available': True
|
||
}
|
||
else:
|
||
return {'status': 'error', 'available': False}
|
||
except Exception as e:
|
||
return {'status': 'error', 'message': str(e), 'available': False}
|
||
|
||
def get_calendar_info(self) -> Dict:
|
||
"""获取交易日历服务信息"""
|
||
url = f"{self.base_url}/api/v1/calendar/info"
|
||
|
||
try:
|
||
response = _http_get(url, timeout=10)
|
||
if response.status == 200:
|
||
return _parse_json(response)
|
||
else:
|
||
return {"error": f"HTTP {response.status}"}
|
||
except Exception as e:
|
||
return {"error": str(e)}
|
||
|
||
def get_trading_calendar(
|
||
self,
|
||
market: str,
|
||
start_date: str,
|
||
end_date: str
|
||
) -> Optional[pd.DatetimeIndex]:
|
||
"""
|
||
获取交易日历
|
||
|
||
Args:
|
||
market: 市场代码
|
||
- 'A' 或 'china': A股(上交所/深交所,交易日历一致)
|
||
- 'US' 或 'us': 美股(NYSE)
|
||
- 'HK' 或 'hk': 港股(HKEX)
|
||
start_date: 开始日期 YYYY-MM-DD
|
||
end_date: 结束日期 YYYY-MM-DD
|
||
|
||
Returns:
|
||
DatetimeIndex: 交易日日期序列,失败返回 None
|
||
|
||
示例:
|
||
# 获取 A 股 2024 年 1 月交易日历
|
||
dates = source.get_trading_calendar('A', '2024-01-01', '2024-01-31')
|
||
|
||
# 获取美股交易日历
|
||
dates = source.get_trading_calendar('US', '2024-01-01', '2024-01-15')
|
||
"""
|
||
url = f"{self.base_url}/api/v1/trading-calendar"
|
||
|
||
params = {
|
||
'market': market,
|
||
'start': start_date,
|
||
'end': end_date
|
||
}
|
||
|
||
for attempt in range(self.retries):
|
||
try:
|
||
response = _http_get(url, params=params, timeout=self.timeout)
|
||
|
||
if response.status != 200:
|
||
if attempt < self.retries - 1:
|
||
print(f"⚠ 交易日历请求失败 (HTTP {response.status}),重试 {attempt + 2}/{self.retries}")
|
||
time.sleep(1 + attempt)
|
||
continue
|
||
print(f"✗ 交易日历请求失败: HTTP {response.status} - {response.data.decode('utf-8', errors='replace')[:100]}")
|
||
return None
|
||
|
||
data = _parse_json(response)
|
||
|
||
# 检查错误
|
||
if 'error' in data:
|
||
print(f"✗ 交易日历获取失败: {data['error']}")
|
||
return None
|
||
|
||
# 解析交易日期
|
||
trading_dates = data.get('trading_dates', [])
|
||
if not trading_dates:
|
||
print(f"⚠ 市场 {market} 在 {start_date} ~ {end_date} 期间无交易日")
|
||
return pd.DatetimeIndex([])
|
||
|
||
# 转换为 DatetimeIndex
|
||
dates = pd.DatetimeIndex(trading_dates)
|
||
count = data.get('count', len(dates))
|
||
exchange = data.get('exchange', '')
|
||
|
||
print(f"✓ {market} ({exchange}): {count} 个交易日 ({start_date} ~ {end_date})")
|
||
return dates
|
||
|
||
except urllib3.exceptions.TimeoutError:
|
||
if attempt < self.retries - 1:
|
||
print(f"⚠ 交易日历请求超时,重试 {attempt + 2}/{self.retries}")
|
||
time.sleep(1 + attempt)
|
||
continue
|
||
print(f"✗ 交易日历请求超时")
|
||
return None
|
||
|
||
except (urllib3.exceptions.SSLError, urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError) as e:
|
||
if attempt < self.retries - 1:
|
||
print(f"⚠ 交易日历: {type(e).__name__},重试 {attempt + 2}/{self.retries}")
|
||
time.sleep(1 + attempt)
|
||
continue
|
||
print(f"✗ 交易日历: {type(e).__name__} after {self.retries} retries")
|
||
return None
|
||
|
||
except urllib3.exceptions.HTTPError as e:
|
||
if attempt < self.retries - 1:
|
||
time.sleep(1 + attempt)
|
||
continue
|
||
print(f"✗ 交易日历请求异常: {e}")
|
||
return None
|
||
|
||
except json.JSONDecodeError as e:
|
||
print(f"✗ 交易日历 JSON 解析失败: {e}")
|
||
return None
|
||
|
||
return None
|
||
|
||
def get_service_info(self) -> Dict:
|
||
"""获取服务信息"""
|
||
url = f"{self.base_url}/"
|
||
|
||
try:
|
||
response = _http_get(url, timeout=10)
|
||
return _parse_json(response)
|
||
except Exception as e:
|
||
return {"error": str(e)}
|
||
|
||
|
||
# 全局实例
|
||
_flask_api_source: Optional[FlaskAPIDataSource] = None
|
||
|
||
|
||
def get_flask_api_source(base_url: str = None) -> FlaskAPIDataSource:
|
||
"""获取 Flask API 数据源实例"""
|
||
global _flask_api_source
|
||
|
||
if _flask_api_source is None:
|
||
_flask_api_source = FlaskAPIDataSource(base_url=base_url)
|
||
|
||
return _flask_api_source |