diff --git a/Dockerfile b/Dockerfile index 23044d8..b69b913 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,7 +21,7 @@ ENV TZ=Asia/Shanghai EXPOSE 80 # 启动Flask数据API服务(默认端口80) -# CMD ["python", "datasource/flask_server.py", "--host", "0.0.0.0"] +CMD ["python", "datasource/flask_server.py", "--host", "0.0.0.0"] # 运行定时任务调度器(如需使用Flask服务,取消上面注释并注释掉下面) -CMD ["python", "scripts/daily_scheduler.py", "--time", "09:00"] \ No newline at end of file +# CMD ["python", "scripts/daily_scheduler.py", "--time", "09:00"] \ No newline at end of file diff --git a/datasource/flask_server.py b/datasource/flask_server.py index 5b6a25e..85a1f27 100644 --- a/datasource/flask_server.py +++ b/datasource/flask_server.py @@ -180,8 +180,21 @@ def _slice_data_from_cache(cached_data: Dict, start: str, end: str) -> Dict: # 从缓存数据中重建 DataFrame records = cached_data['df_json']['data'] + info_data = cached_data['df_json'].get('info', None) # 从缓存获取 info + if not records: - return cached_data + result = { + 'data': [], + 'count': 0, + 'code': cached_data['code'], + 'asset_type': cached_data['asset_type'], + 'requested_range': {'start': start, 'end': end}, + 'available_range': {'start': cached_data['data_start'], 'end': cached_data['data_end']}, + } + # 保留 info(如果有) + if info_data: + result['info'] = info_data + return result # 转换为 DataFrame df = pd.DataFrame(records) @@ -189,6 +202,10 @@ def _slice_data_from_cache(cached_data: Dict, start: str, end: str) -> Dict: df['date'] = pd.to_datetime(df['date']) df = df.set_index('date') + # 恢复 attrs(如果有 info) + if info_data: + df.attrs['info'] = info_data + # 切片日期范围 start_dt = pd.to_datetime(start) end_dt = pd.to_datetime(end) @@ -199,17 +216,7 @@ def _slice_data_from_cache(cached_data: Dict, start: str, end: str) -> Dict: # 切片(使用 loc 进行日期范围选择) sliced_df = df.loc[start_dt:end_dt] - if len(sliced_df) == 0: - return { - 'data': [], - 'count': 0, - 'code': cached_data['code'], - 'asset_type': cached_data['asset_type'], - 'requested_range': {'start': start, 'end': end}, - 'available_range': {'start': cached_data['data_start'], 'end': cached_data['data_end']}, - } - - # 转换为 JSON 格式 + # 转换为 JSON 格式(dataframe_to_json 会处理 df.attrs['info']) result = dataframe_to_json(sliced_df) result['code'] = cached_data['code'] result['asset_type'] = cached_data['asset_type'] @@ -337,10 +344,37 @@ def get_cache_info() -> Dict: # DataFrame 转换 # ============================================================ +class JSONEncoder(json.JSONEncoder): + """自定义 JSON 编码器,处理特殊类型""" + def default(self, obj): + # 处理 pandas Timestamp + if hasattr(obj, 'isoformat'): + return obj.isoformat() + # 处理 numpy 类型 + if hasattr(obj, 'item'): + return obj.item() + # 处理 NaN/Infinity + if isinstance(obj, float): + if obj != obj: # NaN + return None + if obj == float('inf'): + return None + if obj == float('-inf'): + return None + return super().default(obj) + + def dataframe_to_json(df: pd.DataFrame) -> Dict: - """将 DataFrame 转换为 JSON 可序列化的字典""" + """将 DataFrame 转换为 JSON 可序列化的字典 + + 如果 df.attrs 中有 info 字段,会放到最外层返回 + """ if df is None or len(df) == 0: - return {"data": [], "count": 0} + result = {"data": [], "count": 0} + # 即使空数据也返回 info(如果有) + if hasattr(df, 'attrs') and 'info' in df.attrs: + result['info'] = df.attrs['info'] + return result # 重置索引 df_reset = df.reset_index() @@ -357,18 +391,32 @@ def dataframe_to_json(df: pd.DataFrame) -> Dict: except Exception: pass - # 转换为字典列表 - records = df_reset.to_dict(orient='records') + # 处理特殊值(NaN, Infinity) + df_clean = df_reset.copy() + for col in df_clean.columns: + if df_clean[col].dtype in ['float64', 'float32']: + df_clean[col] = df_clean[col].replace([float('inf'), float('-inf')], None) + df_clean[col] = df_clean[col].where(df_clean[col].notna(), None) - return { + # 转换为字典列表 + records = df_clean.to_dict(orient='records') + + # 构建返回结果 + result = { "data": records, "count": len(records), - "columns": list(df_reset.columns), + "columns": list(df_clean.columns), "date_range": { "start": df.index.min().strftime('%Y-%m-%d') if len(df) > 0 else None, "end": df.index.max().strftime('%Y-%m-%d') if len(df) > 0 else None, } } + + # 将 info 从 df.attrs 放到最外层 + if hasattr(df, 'attrs') and 'info' in df.attrs: + result['info'] = df.attrs['info'] + + return result def validate_date(date_str: str) -> bool: diff --git a/datasource/yfinance_source.py b/datasource/yfinance_source.py index f0fa0f3..76d53b3 100644 --- a/datasource/yfinance_source.py +++ b/datasource/yfinance_source.py @@ -55,6 +55,7 @@ class YFinanceSource: Returns: DataFrame with columns: date, open, high, low, close, volume + 股票元信息存储在 df.attrs['info'] 中 """ import yfinance as yf @@ -67,6 +68,13 @@ class YFinanceSource: try: ticker = yf.Ticker(yf_code) + # 获取股票信息(仅对股票/ETF有效,指数可能没有) + stock_info = {} + try: + stock_info = ticker.info or {} + except Exception: + pass # 指数可能没有info + # end_date 需要加一天(yfinance的end是排他的) end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1) @@ -96,6 +104,10 @@ class YFinanceSource: # 添加代码列 df["code"] = code + # 将股票信息存储到 DataFrame.attrs 中(最外层结构) + df.attrs['info'] = stock_info + df.attrs['code'] = code + return df[['code', 'open', 'high', 'low', 'close', 'volume']] except Exception as e: