架构改动: - 移除 HybridDataSource(功能被 UniversalDataFetcher 覆盖) - 新增分层接口设计:基础层 + 扩展层 基础层(统一接口): - fetch(): 统一 OHLCV 接口,自动识别资产类型 - fetch_batch(): 批量获取 扩展层(资产类型特有): - fetch_etf_adj(): A股 ETF 后复权价格 - fetch_us_adj(): 美股复权价格 - fetch_etf_with_nav(): ETF 价格 + 净值 + 溢价率 其他修改: - YFinanceSource: 新增 fetch_adj() 方法 - strategy.py: 改用 UniversalDataFetcher 替代 HybridDataSource - __init__.py: 移除 HybridDataSource 导出
188 lines
6.0 KiB
Python
188 lines
6.0 KiB
Python
"""
|
||
YFinance数据源
|
||
|
||
获取港股、美股数据(通过SSH隧道)
|
||
"""
|
||
|
||
import os
|
||
import time
|
||
from typing import Optional
|
||
from datetime import datetime, timedelta
|
||
import pandas as pd
|
||
import urllib3
|
||
|
||
# 禁用SSL警告
|
||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||
|
||
|
||
class YFinanceSource:
|
||
"""YFinance数据源"""
|
||
|
||
# 代码映射(项目代码 -> YFinance格式)
|
||
CODE_MAP = {
|
||
# 港股
|
||
"HSTECH.HK": "3033.HK", # 恒生科技指数
|
||
"HSI": "^HSI", # 恒生指数
|
||
# 美股指数
|
||
"NDX": "^NDX", # 纳斯达克100
|
||
"SPX": "^GSPC", # 标普500
|
||
"DJI": "^DJI", # 道琼斯
|
||
# 日本/欧洲
|
||
"N225": "^N225", # 日经225
|
||
"GDAXI": "^GDAXI", # 德国DAX
|
||
# 商品
|
||
"CL.NYM": "CL=F", # WTI原油期货
|
||
}
|
||
|
||
def __init__(self, use_ssh_tunnel: bool = False):
|
||
"""
|
||
初始化YFinance数据源
|
||
|
||
Args:
|
||
use_ssh_tunnel: 是否使用SSH隧道(需先启动SSHTunnelManager)
|
||
"""
|
||
self.use_ssh_tunnel = use_ssh_tunnel
|
||
self._delay = 0.5 # 请求延迟(避免限流)
|
||
|
||
def fetch(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取数据
|
||
|
||
Args:
|
||
code: 代码(如 'NDX', 'N225', 'HSI')
|
||
start_date: 开始日期 'YYYY-MM-DD'
|
||
end_date: 结束日期 'YYYY-MM-DD'
|
||
|
||
Returns:
|
||
DataFrame with columns: date, open, high, low, close, volume
|
||
股票元信息存储在 df.attrs['info'] 中
|
||
"""
|
||
import yfinance as yf
|
||
|
||
# 添加延迟避免限流
|
||
time.sleep(self._delay)
|
||
|
||
# 转换代码格式
|
||
yf_code = self.CODE_MAP.get(code, code)
|
||
|
||
try:
|
||
ticker = yf.Ticker(yf_code)
|
||
|
||
# 获取股票信息(仅对股票/ETF有效,指数可能没有)
|
||
stock_info = {}
|
||
try:
|
||
stock_info = ticker.info or {}
|
||
except Exception:
|
||
pass # 指数可能没有info
|
||
|
||
# end_date 需要加一天(yfinance的end是排他的)
|
||
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
|
||
|
||
# auto_adjust=False 获取不复权价格
|
||
df = ticker.history(
|
||
start=start_date,
|
||
end=end_dt.strftime("%Y-%m-%d"),
|
||
auto_adjust=False
|
||
)
|
||
|
||
if df is None or len(df) == 0:
|
||
return None
|
||
|
||
# 标准化列名
|
||
df = df.rename(columns={
|
||
"Open": "open",
|
||
"High": "high",
|
||
"Low": "low",
|
||
"Close": "close",
|
||
"Volume": "volume",
|
||
})
|
||
|
||
# 确保索引是日期格式
|
||
df.index = pd.to_datetime(df.index, utc=True).tz_localize(None).normalize()
|
||
df.index.name = "date"
|
||
|
||
# 添加代码列
|
||
df["code"] = code
|
||
|
||
# 将股票信息存储到 DataFrame.attrs 中(最外层结构)
|
||
df.attrs['info'] = stock_info
|
||
df.attrs['code'] = code
|
||
|
||
return df[['code', 'open', 'high', 'low', 'close', 'volume']]
|
||
|
||
except Exception as e:
|
||
print(f"YFinance下载 {code} ({yf_code}) 失败: {e}")
|
||
return None
|
||
|
||
def fetch_adj(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取复权价格数据
|
||
|
||
使用 auto_adjust=True 获取复权后的价格
|
||
- 消除拆分(split)和分红(dividend)对价格的影响
|
||
- 适用于美股股票/ETF
|
||
|
||
Args:
|
||
code: 代码(如 'AAPL', 'TSLA', 'QQQ')
|
||
start_date: 开始日期 'YYYY-MM-DD'
|
||
end_date: 结束日期 'YYYY-MM-DD'
|
||
|
||
Returns:
|
||
DataFrame with columns: date, open, high, low, close, volume (复权后)
|
||
"""
|
||
import yfinance as yf
|
||
|
||
# 添加延迟避免限流
|
||
time.sleep(self._delay)
|
||
|
||
# 转换代码格式
|
||
yf_code = self.CODE_MAP.get(code, code)
|
||
|
||
try:
|
||
ticker = yf.Ticker(yf_code)
|
||
|
||
# end_date 需要加一天(yfinance的end是排他的)
|
||
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
|
||
|
||
# auto_adjust=True 获取复权价格
|
||
df = ticker.history(
|
||
start=start_date,
|
||
end=end_dt.strftime("%Y-%m-%d"),
|
||
auto_adjust=True
|
||
)
|
||
|
||
if df is None or len(df) == 0:
|
||
return None
|
||
|
||
# 标准化列名
|
||
df = df.rename(columns={
|
||
"Open": "open",
|
||
"High": "high",
|
||
"Low": "low",
|
||
"Close": "close",
|
||
"Volume": "volume",
|
||
})
|
||
|
||
# 确保索引是日期格式
|
||
df.index = pd.to_datetime(df.index, utc=True).tz_localize(None).normalize()
|
||
df.index.name = "date"
|
||
|
||
# 添加代码列和标记
|
||
df["code"] = code
|
||
df.attrs['code'] = code
|
||
df.attrs['adjusted'] = True
|
||
|
||
return df[['code', 'open', 'high', 'low', 'close', 'volume']]
|
||
|
||
except Exception as e:
|
||
print(f"YFinance下载复权数据 {code} ({yf_code}) 失败: {e}")
|
||
return None
|
||
|
||
def is_yfinance_code(self, code: str) -> bool:
|
||
"""判断是否需要YFinance获取"""
|
||
# 非A股代码
|
||
china_suffixes = ['.SH', '.SZ', '.SS', '.CSI']
|
||
futures_suffixes = ['.SHF', '.NYM', '.DCE', '.CZC']
|
||
|
||
# A股或期货用Tushare,其他用YFinance
|
||
return not any(code.endswith(s) for s in china_suffixes + futures_suffixes) |