feat(datasource): 股票info字段放到API响应最外层
- yfinance_source.py: stock_info 存储在 df.attrs['info'] 中
- flask_server.py: dataframe_to_json 从 df.attrs 提取 info 放到最外层
- flask_server.py: 缓存切片函数保留 info 字段
- Dockerfile: 启用 Flask 服务作为默认 CMD(端口80)
响应结构示例:
{
"data": [{"date": "2024-01-01", "code": "AAPL", ...}],
"info": {"sector": "Technology", "industry": "...", ...}
}
This commit is contained in:
@@ -21,7 +21,7 @@ ENV TZ=Asia/Shanghai
|
|||||||
EXPOSE 80
|
EXPOSE 80
|
||||||
|
|
||||||
# 启动Flask数据API服务(默认端口80)
|
# 启动Flask数据API服务(默认端口80)
|
||||||
# CMD ["python", "datasource/flask_server.py", "--host", "0.0.0.0"]
|
CMD ["python", "datasource/flask_server.py", "--host", "0.0.0.0"]
|
||||||
|
|
||||||
# 运行定时任务调度器(如需使用Flask服务,取消上面注释并注释掉下面)
|
# 运行定时任务调度器(如需使用Flask服务,取消上面注释并注释掉下面)
|
||||||
CMD ["python", "scripts/daily_scheduler.py", "--time", "09:00"]
|
# CMD ["python", "scripts/daily_scheduler.py", "--time", "09:00"]
|
||||||
@@ -180,8 +180,21 @@ def _slice_data_from_cache(cached_data: Dict, start: str, end: str) -> Dict:
|
|||||||
|
|
||||||
# 从缓存数据中重建 DataFrame
|
# 从缓存数据中重建 DataFrame
|
||||||
records = cached_data['df_json']['data']
|
records = cached_data['df_json']['data']
|
||||||
|
info_data = cached_data['df_json'].get('info', None) # 从缓存获取 info
|
||||||
|
|
||||||
if not records:
|
if not records:
|
||||||
return cached_data
|
result = {
|
||||||
|
'data': [],
|
||||||
|
'count': 0,
|
||||||
|
'code': cached_data['code'],
|
||||||
|
'asset_type': cached_data['asset_type'],
|
||||||
|
'requested_range': {'start': start, 'end': end},
|
||||||
|
'available_range': {'start': cached_data['data_start'], 'end': cached_data['data_end']},
|
||||||
|
}
|
||||||
|
# 保留 info(如果有)
|
||||||
|
if info_data:
|
||||||
|
result['info'] = info_data
|
||||||
|
return result
|
||||||
|
|
||||||
# 转换为 DataFrame
|
# 转换为 DataFrame
|
||||||
df = pd.DataFrame(records)
|
df = pd.DataFrame(records)
|
||||||
@@ -189,6 +202,10 @@ def _slice_data_from_cache(cached_data: Dict, start: str, end: str) -> Dict:
|
|||||||
df['date'] = pd.to_datetime(df['date'])
|
df['date'] = pd.to_datetime(df['date'])
|
||||||
df = df.set_index('date')
|
df = df.set_index('date')
|
||||||
|
|
||||||
|
# 恢复 attrs(如果有 info)
|
||||||
|
if info_data:
|
||||||
|
df.attrs['info'] = info_data
|
||||||
|
|
||||||
# 切片日期范围
|
# 切片日期范围
|
||||||
start_dt = pd.to_datetime(start)
|
start_dt = pd.to_datetime(start)
|
||||||
end_dt = pd.to_datetime(end)
|
end_dt = pd.to_datetime(end)
|
||||||
@@ -199,17 +216,7 @@ def _slice_data_from_cache(cached_data: Dict, start: str, end: str) -> Dict:
|
|||||||
# 切片(使用 loc 进行日期范围选择)
|
# 切片(使用 loc 进行日期范围选择)
|
||||||
sliced_df = df.loc[start_dt:end_dt]
|
sliced_df = df.loc[start_dt:end_dt]
|
||||||
|
|
||||||
if len(sliced_df) == 0:
|
# 转换为 JSON 格式(dataframe_to_json 会处理 df.attrs['info'])
|
||||||
return {
|
|
||||||
'data': [],
|
|
||||||
'count': 0,
|
|
||||||
'code': cached_data['code'],
|
|
||||||
'asset_type': cached_data['asset_type'],
|
|
||||||
'requested_range': {'start': start, 'end': end},
|
|
||||||
'available_range': {'start': cached_data['data_start'], 'end': cached_data['data_end']},
|
|
||||||
}
|
|
||||||
|
|
||||||
# 转换为 JSON 格式
|
|
||||||
result = dataframe_to_json(sliced_df)
|
result = dataframe_to_json(sliced_df)
|
||||||
result['code'] = cached_data['code']
|
result['code'] = cached_data['code']
|
||||||
result['asset_type'] = cached_data['asset_type']
|
result['asset_type'] = cached_data['asset_type']
|
||||||
@@ -337,10 +344,37 @@ def get_cache_info() -> Dict:
|
|||||||
# DataFrame 转换
|
# DataFrame 转换
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
|
class JSONEncoder(json.JSONEncoder):
|
||||||
|
"""自定义 JSON 编码器,处理特殊类型"""
|
||||||
|
def default(self, obj):
|
||||||
|
# 处理 pandas Timestamp
|
||||||
|
if hasattr(obj, 'isoformat'):
|
||||||
|
return obj.isoformat()
|
||||||
|
# 处理 numpy 类型
|
||||||
|
if hasattr(obj, 'item'):
|
||||||
|
return obj.item()
|
||||||
|
# 处理 NaN/Infinity
|
||||||
|
if isinstance(obj, float):
|
||||||
|
if obj != obj: # NaN
|
||||||
|
return None
|
||||||
|
if obj == float('inf'):
|
||||||
|
return None
|
||||||
|
if obj == float('-inf'):
|
||||||
|
return None
|
||||||
|
return super().default(obj)
|
||||||
|
|
||||||
|
|
||||||
def dataframe_to_json(df: pd.DataFrame) -> Dict:
|
def dataframe_to_json(df: pd.DataFrame) -> Dict:
|
||||||
"""将 DataFrame 转换为 JSON 可序列化的字典"""
|
"""将 DataFrame 转换为 JSON 可序列化的字典
|
||||||
|
|
||||||
|
如果 df.attrs 中有 info 字段,会放到最外层返回
|
||||||
|
"""
|
||||||
if df is None or len(df) == 0:
|
if df is None or len(df) == 0:
|
||||||
return {"data": [], "count": 0}
|
result = {"data": [], "count": 0}
|
||||||
|
# 即使空数据也返回 info(如果有)
|
||||||
|
if hasattr(df, 'attrs') and 'info' in df.attrs:
|
||||||
|
result['info'] = df.attrs['info']
|
||||||
|
return result
|
||||||
|
|
||||||
# 重置索引
|
# 重置索引
|
||||||
df_reset = df.reset_index()
|
df_reset = df.reset_index()
|
||||||
@@ -357,18 +391,32 @@ def dataframe_to_json(df: pd.DataFrame) -> Dict:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# 转换为字典列表
|
# 处理特殊值(NaN, Infinity)
|
||||||
records = df_reset.to_dict(orient='records')
|
df_clean = df_reset.copy()
|
||||||
|
for col in df_clean.columns:
|
||||||
|
if df_clean[col].dtype in ['float64', 'float32']:
|
||||||
|
df_clean[col] = df_clean[col].replace([float('inf'), float('-inf')], None)
|
||||||
|
df_clean[col] = df_clean[col].where(df_clean[col].notna(), None)
|
||||||
|
|
||||||
return {
|
# 转换为字典列表
|
||||||
|
records = df_clean.to_dict(orient='records')
|
||||||
|
|
||||||
|
# 构建返回结果
|
||||||
|
result = {
|
||||||
"data": records,
|
"data": records,
|
||||||
"count": len(records),
|
"count": len(records),
|
||||||
"columns": list(df_reset.columns),
|
"columns": list(df_clean.columns),
|
||||||
"date_range": {
|
"date_range": {
|
||||||
"start": df.index.min().strftime('%Y-%m-%d') if len(df) > 0 else None,
|
"start": df.index.min().strftime('%Y-%m-%d') if len(df) > 0 else None,
|
||||||
"end": df.index.max().strftime('%Y-%m-%d') if len(df) > 0 else None,
|
"end": df.index.max().strftime('%Y-%m-%d') if len(df) > 0 else None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 将 info 从 df.attrs 放到最外层
|
||||||
|
if hasattr(df, 'attrs') and 'info' in df.attrs:
|
||||||
|
result['info'] = df.attrs['info']
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def validate_date(date_str: str) -> bool:
|
def validate_date(date_str: str) -> bool:
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ class YFinanceSource:
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DataFrame with columns: date, open, high, low, close, volume
|
DataFrame with columns: date, open, high, low, close, volume
|
||||||
|
股票元信息存储在 df.attrs['info'] 中
|
||||||
"""
|
"""
|
||||||
import yfinance as yf
|
import yfinance as yf
|
||||||
|
|
||||||
@@ -67,6 +68,13 @@ class YFinanceSource:
|
|||||||
try:
|
try:
|
||||||
ticker = yf.Ticker(yf_code)
|
ticker = yf.Ticker(yf_code)
|
||||||
|
|
||||||
|
# 获取股票信息(仅对股票/ETF有效,指数可能没有)
|
||||||
|
stock_info = {}
|
||||||
|
try:
|
||||||
|
stock_info = ticker.info or {}
|
||||||
|
except Exception:
|
||||||
|
pass # 指数可能没有info
|
||||||
|
|
||||||
# end_date 需要加一天(yfinance的end是排他的)
|
# end_date 需要加一天(yfinance的end是排他的)
|
||||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
|
end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
|
||||||
|
|
||||||
@@ -96,6 +104,10 @@ class YFinanceSource:
|
|||||||
# 添加代码列
|
# 添加代码列
|
||||||
df["code"] = code
|
df["code"] = code
|
||||||
|
|
||||||
|
# 将股票信息存储到 DataFrame.attrs 中(最外层结构)
|
||||||
|
df.attrs['info'] = stock_info
|
||||||
|
df.attrs['code'] = code
|
||||||
|
|
||||||
return df[['code', 'open', 'high', 'low', 'close', 'volume']]
|
return df[['code', 'open', 'high', 'low', 'close', 'volume']]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user