Files
etf/datasource/yfinance_source.py
aszerW 1148d3166c refactor(datasource): 分层接口设计,移除HybridDataSource
架构改动:
- 移除 HybridDataSource(功能被 UniversalDataFetcher 覆盖)
- 新增分层接口设计:基础层 + 扩展层

基础层(统一接口):
- fetch(): 统一 OHLCV 接口,自动识别资产类型
- fetch_batch(): 批量获取

扩展层(资产类型特有):
- fetch_etf_adj(): A股 ETF 后复权价格
- fetch_us_adj(): 美股复权价格
- fetch_etf_with_nav(): ETF 价格 + 净值 + 溢价率

其他修改:
- YFinanceSource: 新增 fetch_adj() 方法
- strategy.py: 改用 UniversalDataFetcher 替代 HybridDataSource
- __init__.py: 移除 HybridDataSource 导出
2026-05-23 12:46:48 +08:00

188 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
YFinance数据源
获取港股、美股数据通过SSH隧道
"""
import os
import time
from typing import Optional
from datetime import datetime, timedelta
import pandas as pd
import urllib3
# 禁用SSL警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class YFinanceSource:
"""YFinance数据源"""
# 代码映射(项目代码 -> YFinance格式
CODE_MAP = {
# 港股
"HSTECH.HK": "3033.HK", # 恒生科技指数
"HSI": "^HSI", # 恒生指数
# 美股指数
"NDX": "^NDX", # 纳斯达克100
"SPX": "^GSPC", # 标普500
"DJI": "^DJI", # 道琼斯
# 日本/欧洲
"N225": "^N225", # 日经225
"GDAXI": "^GDAXI", # 德国DAX
# 商品
"CL.NYM": "CL=F", # WTI原油期货
}
def __init__(self, use_ssh_tunnel: bool = False):
"""
初始化YFinance数据源
Args:
use_ssh_tunnel: 是否使用SSH隧道需先启动SSHTunnelManager
"""
self.use_ssh_tunnel = use_ssh_tunnel
self._delay = 0.5 # 请求延迟(避免限流)
def fetch(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
"""
获取数据
Args:
code: 代码(如 'NDX', 'N225', 'HSI'
start_date: 开始日期 'YYYY-MM-DD'
end_date: 结束日期 'YYYY-MM-DD'
Returns:
DataFrame with columns: date, open, high, low, close, volume
股票元信息存储在 df.attrs['info'] 中
"""
import yfinance as yf
# 添加延迟避免限流
time.sleep(self._delay)
# 转换代码格式
yf_code = self.CODE_MAP.get(code, code)
try:
ticker = yf.Ticker(yf_code)
# 获取股票信息(仅对股票/ETF有效指数可能没有
stock_info = {}
try:
stock_info = ticker.info or {}
except Exception:
pass # 指数可能没有info
# end_date 需要加一天yfinance的end是排他的
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
# auto_adjust=False 获取不复权价格
df = ticker.history(
start=start_date,
end=end_dt.strftime("%Y-%m-%d"),
auto_adjust=False
)
if df is None or len(df) == 0:
return None
# 标准化列名
df = df.rename(columns={
"Open": "open",
"High": "high",
"Low": "low",
"Close": "close",
"Volume": "volume",
})
# 确保索引是日期格式
df.index = pd.to_datetime(df.index, utc=True).tz_localize(None).normalize()
df.index.name = "date"
# 添加代码列
df["code"] = code
# 将股票信息存储到 DataFrame.attrs 中(最外层结构)
df.attrs['info'] = stock_info
df.attrs['code'] = code
return df[['code', 'open', 'high', 'low', 'close', 'volume']]
except Exception as e:
print(f"YFinance下载 {code} ({yf_code}) 失败: {e}")
return None
def fetch_adj(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
"""
获取复权价格数据
使用 auto_adjust=True 获取复权后的价格
- 消除拆分(split)和分红(dividend)对价格的影响
- 适用于美股股票/ETF
Args:
code: 代码(如 'AAPL', 'TSLA', 'QQQ'
start_date: 开始日期 'YYYY-MM-DD'
end_date: 结束日期 'YYYY-MM-DD'
Returns:
DataFrame with columns: date, open, high, low, close, volume (复权后)
"""
import yfinance as yf
# 添加延迟避免限流
time.sleep(self._delay)
# 转换代码格式
yf_code = self.CODE_MAP.get(code, code)
try:
ticker = yf.Ticker(yf_code)
# end_date 需要加一天yfinance的end是排他的
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
# auto_adjust=True 获取复权价格
df = ticker.history(
start=start_date,
end=end_dt.strftime("%Y-%m-%d"),
auto_adjust=True
)
if df is None or len(df) == 0:
return None
# 标准化列名
df = df.rename(columns={
"Open": "open",
"High": "high",
"Low": "low",
"Close": "close",
"Volume": "volume",
})
# 确保索引是日期格式
df.index = pd.to_datetime(df.index, utc=True).tz_localize(None).normalize()
df.index.name = "date"
# 添加代码列和标记
df["code"] = code
df.attrs['code'] = code
df.attrs['adjusted'] = True
return df[['code', 'open', 'high', 'low', 'close', 'volume']]
except Exception as e:
print(f"YFinance下载复权数据 {code} ({yf_code}) 失败: {e}")
return None
def is_yfinance_code(self, code: str) -> bool:
"""判断是否需要YFinance获取"""
# 非A股代码
china_suffixes = ['.SH', '.SZ', '.SS', '.CSI']
futures_suffixes = ['.SHF', '.NYM', '.DCE', '.CZC']
# A股或期货用Tushare其他用YFinance
return not any(code.endswith(s) for s in china_suffixes + futures_suffixes)