Files
etf/datasource/flask_api_source.py
aszerW 7f2af6b470 refactor(flask_api): fetch添加adj参数,fetch_with_adj简化
FlaskAPIDataSource.fetch() 新增 adj 参数,fetch_with_adj() 简化

- FlaskAPIDataSource.fetch(adj='raw'): 请求参数包含 adj
- fetch_with_adj(): 简化为 return self.fetch(adj=adj)(减少 ~120行)
- flask_server.py: 缓存逻辑已支持 adj 参数,无需修改
2026-05-23 18:32:20 +08:00

384 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Flask API 数据源
通过部署后的 Flask API 服务获取 OHLCV 数据
支持远程调用,无需本地 SSH 隧道
"""
import os
import json
import requests
import pandas as pd
from typing import Optional, Dict, List
from datetime import datetime
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
class FlaskAPIDataSource:
"""
Flask API 数据源
通过 HTTP API 获取数据,无需本地配置 SSH 隧道
适用于远程调用或生产环境
用法:
source = FlaskAPIDataSource(base_url="https://k3s.tokenpluse.xyz")
df = source.fetch("000300.SH", "2024-01-01", "2024-12-31")
"""
def __init__(
self,
base_url: str = None,
api_path: str = "/api/v1/ohlcv",
timeout: int = 120,
retries: int = 3
):
"""
初始化
Args:
base_url: API 服务基础地址,默认从环境变量读取
api_path: API 路径
timeout: 请求超时时间(秒)
retries: 重试次数
"""
self.base_url = base_url or os.getenv(
'FLASK_API_URL',
'https://k3s.tokenpluse.xyz'
)
self.api_path = api_path
self.timeout = timeout
self.retries = retries
# 确保 base_url 不以 / 结尾
self.base_url = self.base_url.rstrip('/')
def fetch(
self,
code: str,
start_date: str,
end_date: str,
adj: str = 'raw',
asset_type: str = None,
timeframe: str = '1d'
) -> Optional[pd.DataFrame]:
"""
获取单只标的 OHLCV 数据(支持 adj 参数)
Args:
code: 标的代码
start_date: 开始日期 YYYY-MM-DD
end_date: 结束日期 YYYY-MM-DD
adj: 复权类型 'raw'(原始) / 'qfq'(前复权) / 'hfq'(后复权),默认 'raw'
asset_type: 资产类型(可选,用于覆盖自动检测)
timeframe: K线周期加密货币需要
Returns:
DataFrame with columns: date, open, high, low, close, volume
adj='hfq' 时 A股 ETF 会额外返回 adj_factor, close_hfq
示例:
# 原始价格
df = source.fetch("000300.SH", "2020-01-01", "2024-12-31")
# A股股票后复权
df = source.fetch("000001.SZ", "2020-01-01", "2024-12-31", adj='hfq')
"""
# 构建请求 URL
url = f"{self.base_url}{self.api_path}"
# 构建请求参数(包含 adj
params = {
'code': code,
'start': start_date,
'end': end_date,
'adj': adj, # 添加 adj 参数
}
# 加密货币需要 timeframe 参数
if asset_type == 'crypto' or code.upper() in ['BTC', 'ETH']:
params['timeframe'] = timeframe
# 可选:强制指定 asset_type
if asset_type:
params['asset_type'] = asset_type
for attempt in range(self.retries):
try:
response = requests.get(
url,
params=params,
timeout=self.timeout
)
if response.status_code != 200:
if attempt < self.retries - 1:
continue
print(f"✗ API请求失败: {response.status_code} - {response.text[:100]}")
return None
# 尝试解析 JSON支持 zstd 响应)
try:
data = response.json()
except (json.JSONDecodeError, requests.exceptions.JSONDecodeError):
# 如果 response.json() 失败,手动解析
data = json.loads(response.text)
# 检查错误
if 'error' in data:
print(f"✗ API返回错误: {data['error']}")
return None
# 解析数据
records = data.get('data', [])
if not records:
print(f"{code}: 无数据返回")
return None
# 转换为 DataFrame
df = pd.DataFrame(records)
# 处理日期列
if 'date' in df.columns:
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
# 确保列名标准化
df = df[['open', 'high', 'low', 'close', 'volume']]
# 使用 API 返回的实际数据范围(而非请求参数)
actual_start = data.get('date_range', {}).get('start', start_date)
actual_end = data.get('date_range', {}).get('end', end_date)
actual_count = data.get('count', len(df))
# 缓存 info 信息(如果有)
if 'info' in data:
df.attrs['info'] = data['info']
# ETF 数据自动附加净值和溢价率信息
if data.get('asset_type') == 'china_etf':
# 净值数据
nav_section = data.get('nav', {})
if nav_section.get('data'):
nav_df = pd.DataFrame(nav_section['data'])
if 'date' in nav_df.columns:
nav_df['date'] = pd.to_datetime(nav_df['date'])
nav_df = nav_df.set_index('date')
df.attrs['nav'] = nav_df
# 溢价率序列
if 'premium_series' in data:
df.attrs['premium_series'] = data['premium_series']
# 最新溢价率
if 'latest_premium' in data:
df.attrs['latest_premium'] = data['latest_premium']
df.attrs['premium_date'] = data.get('premium_date')
# 溢价率统计
if 'premium_stats' in data:
df.attrs['premium_stats'] = data['premium_stats']
print(f"{code}: {actual_count} 条数据 ({actual_start} ~ {actual_end})")
return df
except requests.exceptions.Timeout:
if attempt < self.retries - 1:
print(f"{code}: 请求超时,重试 {attempt + 2}/{self.retries}")
continue
print(f"{code}: 请求超时")
return None
except requests.exceptions.RequestException as e:
if attempt < self.retries - 1:
continue
print(f"{code}: 请求异常 - {e}")
return None
except json.JSONDecodeError as e:
print(f"{code}: JSON解析失败 - {e}")
return None
return None
def fetch_batch(
self,
codes: List[str],
start_date: str,
end_date: str,
asset_types: Dict[str, str] = None
) -> Dict[str, Optional[pd.DataFrame]]:
"""
批量获取多只标的数据
Args:
codes: 标的代码列表
start_date: 开始日期
end_date: 结束日期
asset_types: 资产类型映射 {code: asset_type}
Returns:
{code: DataFrame}
"""
results = {}
asset_types = asset_types or {}
print(f"从 Flask API 获取 {len(codes)} 只标的...")
for i, code in enumerate(codes, 1):
asset_type = asset_types.get(code)
df = self.fetch(code, start_date, end_date, asset_type)
results[code] = df
# 显示进度
if i % 5 == 0 or i == len(codes):
success = sum(1 for v in results.values() if v is not None)
print(f" 进度: {i}/{len(codes)} (成功: {success})")
return results
def fetch_etf_nav(
self,
code: str,
start_date: str,
end_date: str
) -> Optional[pd.DataFrame]:
"""
获取 ETF 净值数据
Args:
code: ETF代码
start_date: 开始日期
end_date: 结束日期
Returns:
DataFrame with nav column
"""
url = f"{self.base_url}/api/v1/etf/nav"
params = {
'code': code,
'start': start_date,
'end': end_date
}
try:
response = requests.get(url, params=params, timeout=self.timeout)
if response.status_code != 200:
return None
# 处理 zstd 响应
try:
data = response.json()
except (json.JSONDecodeError, requests.exceptions.JSONDecodeError):
data = json.loads(response.text)
if 'error' in data:
return None
# 解析净值数据
# Flask server 返回格式: {'nav': {'data': [...], 'count': N}, 'premium_series': [...]}
nav_section = data.get('nav', {})
records = nav_section.get('data', [])
if not records:
return None
df = pd.DataFrame(records)
if 'date' in df.columns:
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
# 添加溢价率信息(如果有)
if 'premium_series' in data:
df.attrs['premium_series'] = data['premium_series']
if 'latest_premium' in data:
df.attrs['latest_premium'] = data['latest_premium']
if 'premium_stats' in data:
df.attrs['premium_stats'] = data['premium_stats']
return df
except Exception as e:
print(f"{code} 净值获取失败: {e}")
return None
def fetch_with_adj(
self,
code: str,
start_date: str,
end_date: str,
adj: str = 'raw',
asset_type: str = None,
timeframe: str = '1d'
) -> Optional[pd.DataFrame]:
"""
获取 OHLCV 数据(支持复权参数)- 简化版
直接调用 fetch(adj=adj),无需重复实现。
Args:
code: 标的代码
start_date: 开始日期 YYYY-MM-DD
end_date: 结束日期 YYYY-MM-DD
adj: 复权参数raw/qfq/hfq默认 'raw'
asset_type: 资产类型(可选)
timeframe: K线周期加密货币需要
Returns:
DataFrame结构因 adj 参数略有不同
示例:
# A股股票后复权
df = source.fetch_with_adj("000001.SZ", "2020-01-01", "2024-12-31", adj='hfq')
"""
# 直接调用 fetch传递 adj 参数
return self.fetch(code, start_date, end_date, adj, asset_type, timeframe)
def get_health(self) -> Dict:
"""获取服务健康状态"""
# 先尝试 ohlcv 端点检查服务是否可用
url = f"{self.base_url}{self.api_path}"
params = {'code': '000300.SH', 'start': '2024-01-01', 'end': '2024-01-05'}
try:
response = requests.get(url, params=params, timeout=self.timeout)
if response.status_code == 200:
data = response.json()
return {
'status': 'healthy',
'ssh_configured': True,
'available': True
}
else:
return {'status': 'error', 'available': False}
except Exception as e:
return {'status': 'error', 'message': str(e), 'available': False}
def get_service_info(self) -> Dict:
"""获取服务信息"""
url = f"{self.base_url}/"
try:
response = requests.get(url, timeout=10)
return response.json()
except Exception as e:
return {"error": str(e)}
# 全局实例
_flask_api_source: Optional[FlaskAPIDataSource] = None
def get_flask_api_source(base_url: str = None) -> FlaskAPIDataSource:
"""获取 Flask API 数据源实例"""
global _flask_api_source
if _flask_api_source is None:
_flask_api_source = FlaskAPIDataSource(base_url=base_url)
return _flask_api_source