新增方法: - fetch_etf_adj: 获取ETF后复权价格数据,消除份额拆分对收益率的影响 通过 fund_daily + fund_adj 手动计算后复权价格 fund_adj 单次限2000条,按5年分段请求 - fetch_trade_cal: 获取A股SSE官方交易日历 验证结果: - 纳指ETF后复权正确识别2022-01-14拆分(复权因子5.0) - 累计收益100.54%与纳指100指数一致
383 lines
13 KiB
Python
383 lines
13 KiB
Python
"""
|
||
Tushare数据源
|
||
|
||
获取A股指数、ETF、期货数据
|
||
"""
|
||
|
||
import os
|
||
from typing import Optional
|
||
from datetime import datetime
|
||
import pandas as pd
|
||
|
||
|
||
class TushareSource:
|
||
"""Tushare数据源"""
|
||
|
||
def __init__(self, token: Optional[str] = None):
|
||
"""
|
||
初始化Tushare数据源
|
||
|
||
Args:
|
||
token: Tushare Token(可选,默认从环境变量读取)
|
||
"""
|
||
self._token = token or os.getenv("TUSHARE_TOKEN")
|
||
if not self._token:
|
||
raise ValueError("请设置环境变量 TUSHARE_TOKEN")
|
||
|
||
def _get_pro_api(self):
|
||
"""获取Tushare Pro API"""
|
||
import tushare as ts
|
||
return ts.pro_api(self._token)
|
||
|
||
def _clear_proxy(self) -> dict:
|
||
"""清除代理环境变量(Tushare是国内服务,不需要代理)"""
|
||
original = {}
|
||
for key in ["HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"]:
|
||
original[key] = os.environ.pop(key, None)
|
||
return original
|
||
|
||
def _restore_proxy(self, original: dict):
|
||
"""恢复代理环境变量"""
|
||
for key, value in original.items():
|
||
if value is not None:
|
||
os.environ[key] = value
|
||
|
||
def fetch_index(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取A股指数数据
|
||
|
||
Args:
|
||
code: 指数代码,如 '000300.SH', '399006.SZ', 'H30269.CSI'
|
||
start_date: 开始日期 'YYYY-MM-DD'
|
||
end_date: 结束日期 'YYYY-MM-DD'
|
||
|
||
Returns:
|
||
DataFrame with columns: date, open, high, low, close, volume
|
||
"""
|
||
original_proxy = self._clear_proxy()
|
||
|
||
try:
|
||
pro = self._get_pro_api()
|
||
|
||
# 转换代码格式 (.SS -> .SH)
|
||
ts_code = code.replace(".SS", ".SH")
|
||
|
||
df = pro.index_daily(
|
||
ts_code=ts_code,
|
||
start_date=start_date.replace("-", ""),
|
||
end_date=end_date.replace("-", "")
|
||
)
|
||
|
||
if df is None or len(df) == 0:
|
||
return None
|
||
|
||
# 标准化列名
|
||
df = df.rename(columns={
|
||
"trade_date": "date",
|
||
"vol": "volume",
|
||
})
|
||
|
||
# 转换日期格式
|
||
df["date"] = pd.to_datetime(df["date"])
|
||
df = df.set_index("date")
|
||
df = df.sort_index()
|
||
df["code"] = code
|
||
|
||
return df[['code', 'open', 'high', 'low', 'close', 'volume']]
|
||
|
||
except Exception as e:
|
||
print(f"Tushare下载指数 {code} 失败: {e}")
|
||
return None
|
||
|
||
finally:
|
||
self._restore_proxy(original_proxy)
|
||
|
||
def fetch_futures(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取期货数据
|
||
|
||
Args:
|
||
code: 期货代码,如 'AU.SHF', 'CU.SHF'
|
||
start_date: 开始日期
|
||
end_date: 结束日期
|
||
"""
|
||
original_proxy = self._clear_proxy()
|
||
|
||
try:
|
||
import tushare as ts
|
||
pro = self._get_pro_api()
|
||
|
||
# 使用 fut_daily 接口
|
||
df = pro.fut_daily(
|
||
ts_code=code,
|
||
start_date=start_date.replace("-", ""),
|
||
end_date=end_date.replace("-", "")
|
||
)
|
||
|
||
if df is None or len(df) == 0:
|
||
return None
|
||
|
||
# 标准化列名
|
||
df = df.rename(columns={
|
||
"trade_date": "date",
|
||
"vol": "volume",
|
||
})
|
||
|
||
df["date"] = pd.to_datetime(df["date"])
|
||
df = df.set_index("date")
|
||
df = df.sort_index()
|
||
df["code"] = code
|
||
|
||
return df[['code', 'open', 'high', 'low', 'close', 'volume']]
|
||
|
||
except Exception as e:
|
||
print(f"Tushare下载期货 {code} 失败: {e}")
|
||
return None
|
||
|
||
finally:
|
||
self._restore_proxy(original_proxy)
|
||
|
||
def fetch_etf(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取ETF价格数据
|
||
|
||
Args:
|
||
code: ETF代码,如 '159915.SZ', '518880.SH'
|
||
"""
|
||
original_proxy = self._clear_proxy()
|
||
|
||
try:
|
||
pro = self._get_pro_api()
|
||
|
||
ts_code = code.replace(".SS", ".SH")
|
||
|
||
df = pro.fund_daily(
|
||
ts_code=ts_code,
|
||
start_date=start_date.replace("-", ""),
|
||
end_date=end_date.replace("-", "")
|
||
)
|
||
|
||
if df is None or len(df) == 0:
|
||
return None
|
||
|
||
df = df.rename(columns={
|
||
"trade_date": "date",
|
||
"vol": "volume",
|
||
})
|
||
|
||
df["date"] = pd.to_datetime(df["date"])
|
||
df = df.set_index("date")
|
||
df = df.sort_index()
|
||
df["code"] = code
|
||
|
||
return df[['code', 'open', 'high', 'low', 'close', 'volume']]
|
||
|
||
except Exception as e:
|
||
print(f"Tushare下载ETF {code} 失败: {e}")
|
||
return None
|
||
|
||
finally:
|
||
self._restore_proxy(original_proxy)
|
||
|
||
def fetch_etf_nav(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取ETF净值数据
|
||
|
||
Args:
|
||
code: ETF代码
|
||
"""
|
||
original_proxy = self._clear_proxy()
|
||
|
||
try:
|
||
pro = self._get_pro_api()
|
||
|
||
ts_code = code.replace(".SS", ".SH")
|
||
|
||
df = pro.fund_nav(
|
||
ts_code=ts_code,
|
||
start_date=start_date.replace("-", ""),
|
||
end_date=end_date.replace("-", "")
|
||
)
|
||
|
||
if df is None or len(df) == 0:
|
||
return None
|
||
|
||
df = df.rename(columns={
|
||
"nav_date": "date",
|
||
"unit_nav": "nav",
|
||
})
|
||
|
||
df["date"] = pd.to_datetime(df["date"])
|
||
df = df.set_index("date")
|
||
df = df.sort_index()
|
||
df["code"] = code
|
||
|
||
return df[['code', 'nav']]
|
||
|
||
except Exception as e:
|
||
print(f"Tushare下载ETF净值 {code} 失败: {e}")
|
||
return None
|
||
|
||
finally:
|
||
self._restore_proxy(original_proxy)
|
||
|
||
def is_china_index(self, code: str) -> bool:
|
||
"""判断是否为A股指数"""
|
||
return code.endswith(".SH") or code.endswith(".SZ") or code.endswith(".SS") or code.endswith(".CSI")
|
||
|
||
def is_futures(self, code: str) -> bool:
|
||
"""判断是否为中国期货(仅支持上期所、大商所、郑商所)"""
|
||
# 只支持中国交易所期货(.SHF上期所、.DCE大商所、.CZC郑商所)
|
||
# NYMEX (.NYM) 和 ICE (.ICE) 走 YFinance
|
||
return ".SHF" in code or ".DCE" in code or ".CZC" in code
|
||
|
||
def fetch(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
||
"""
|
||
通用数据获取(自动判断类型)
|
||
|
||
Args:
|
||
code: 代码
|
||
start_date: 开始日期
|
||
end_date: 结束日期
|
||
"""
|
||
if self.is_china_index(code):
|
||
return self.fetch_index(code, start_date, end_date)
|
||
elif self.is_futures(code):
|
||
return self.fetch_futures(code, start_date, end_date)
|
||
else:
|
||
return None
|
||
|
||
def fetch_etf_adj(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
|
||
"""
|
||
获取 ETF 后复权价格数据
|
||
|
||
通过 fund_daily + fund_adj 手动计算后复权价格,消除份额折算(拆分)对收益率的影响。
|
||
fund_adj 单次限 2000 条,按 5 年分段请求再拼接。
|
||
|
||
Args:
|
||
code: ETF代码,如 '159915.SZ', '518880.SH'
|
||
start_date: 开始日期 'YYYY-MM-DD'
|
||
end_date: 结束日期 'YYYY-MM-DD'
|
||
|
||
Returns:
|
||
DataFrame with columns: date, open, close, adj_factor, close_hfq
|
||
"""
|
||
import tushare as ts
|
||
from datetime import datetime
|
||
|
||
original_proxy = self._clear_proxy()
|
||
|
||
try:
|
||
pro = self._get_pro_api()
|
||
ts_code = code.replace('.SS', '.SH')
|
||
|
||
# 获取 fund_daily 数据
|
||
df_daily = pro.fund_daily(
|
||
ts_code=ts_code,
|
||
start_date=start_date.replace('-', ''),
|
||
end_date=end_date.replace('-', '')
|
||
)
|
||
|
||
if df_daily is None or len(df_daily) == 0:
|
||
return None
|
||
|
||
# 获取 fund_adj 数据(分段请求,单次限2000条)
|
||
# 按5年分段
|
||
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
|
||
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
|
||
|
||
adj_chunks = []
|
||
chunk_start = start_dt
|
||
while chunk_start < end_dt:
|
||
chunk_end = min(chunk_start.replace(year=chunk_start.year + 5), end_dt)
|
||
chunk_start_str = chunk_start.strftime('%Y%m%d')
|
||
chunk_end_str = chunk_end.strftime('%Y%m%d')
|
||
|
||
df_adj_chunk = pro.fund_adj(
|
||
ts_code=ts_code,
|
||
start_date=chunk_start_str,
|
||
end_date=chunk_end_str
|
||
)
|
||
|
||
if df_adj_chunk is not None and len(df_adj_chunk) > 0:
|
||
adj_chunks.append(df_adj_chunk)
|
||
|
||
chunk_start = chunk_end
|
||
|
||
if not adj_chunks:
|
||
# 无复权因子,返回原始数据
|
||
df = df_daily.rename(columns={'trade_date': 'date', 'vol': 'volume'})
|
||
df['date'] = pd.to_datetime(df['date'])
|
||
df = df.set_index('date').sort_index()
|
||
df['adj_factor'] = 1.0
|
||
df['close_hfq'] = df['close']
|
||
return df[['code', 'open', 'close', 'adj_factor', 'close_hfq']]
|
||
|
||
# 合并所有复权因子
|
||
df_adj = pd.concat(adj_chunks, ignore_index=True)
|
||
df_adj = df_adj.rename(columns={'trade_date': 'date'})
|
||
df_adj['date'] = pd.to_datetime(df_adj['date'])
|
||
df_adj = df_adj.set_index('date').sort_index()
|
||
|
||
# 合并 daily 和 adj
|
||
df_daily = df_daily.rename(columns={'trade_date': 'date', 'vol': 'volume'})
|
||
df_daily['date'] = pd.to_datetime(df_daily['date'])
|
||
df_daily = df_daily.set_index('date').sort_index()
|
||
|
||
# 复权因子对齐(用最新值)
|
||
df_adj_aligned = df_adj.reindex(df_daily.index, method='ffill')
|
||
df_adj_aligned['adj_factor'] = df_adj_aligned['adj_factor'].fillna(1.0)
|
||
|
||
# 计算后复权价格
|
||
df = df_daily.copy()
|
||
df['adj_factor'] = df_adj_aligned['adj_factor']
|
||
df['close_hfq'] = df['close'] * df['adj_factor']
|
||
df['code'] = code
|
||
|
||
return df[['code', 'open', 'close', 'adj_factor', 'close_hfq']]
|
||
|
||
except Exception as e:
|
||
print(f"Tushare下载ETF复权数据 {code} 失败: {e}")
|
||
return None
|
||
|
||
finally:
|
||
self._restore_proxy(original_proxy)
|
||
|
||
def fetch_trade_cal(self, start_date: str, end_date: str) -> pd.DatetimeIndex:
|
||
"""
|
||
获取 A 股(上交所 SSE)官方交易日历
|
||
|
||
Args:
|
||
start_date: 开始日期 'YYYY-MM-DD'
|
||
end_date: 结束日期 'YYYY-MM-DD'
|
||
|
||
Returns:
|
||
DatetimeIndex: A股交易日日期序列
|
||
"""
|
||
import tushare as ts
|
||
|
||
original_proxy = self._clear_proxy()
|
||
|
||
try:
|
||
pro = self._get_pro_api()
|
||
|
||
df = pro.trade_cal(
|
||
exchange='SSE',
|
||
start_date=start_date.replace('-', ''),
|
||
end_date=end_date.replace('-', ''),
|
||
is_open='1'
|
||
)
|
||
|
||
if df is None or len(df) == 0:
|
||
return pd.DatetimeIndex([])
|
||
|
||
# 提取交易日并转换为 DatetimeIndex
|
||
trade_dates = pd.to_datetime(df['cal_date'])
|
||
return pd.DatetimeIndex(trade_dates.sort_values())
|
||
|
||
except Exception as e:
|
||
print(f"Tushare下载交易日历失败: {e}")
|
||
return pd.DatetimeIndex([])
|
||
|
||
finally:
|
||
self._restore_proxy(original_proxy) |