From 1de792248456370897312ebecb26194805ed7f8f Mon Sep 17 00:00:00 2001 From: aszerW Date: Tue, 28 Oct 2025 21:13:18 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9C=A8=20fetch=5Fpaginated=5Fdata=20?= =?UTF-8?q?=E4=B8=AD=E8=AE=BE=E7=BD=AE=20sleep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- index_downloader.py | 19 ++-- index_spot.py | 214 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 9 deletions(-) create mode 100644 index_spot.py diff --git a/index_downloader.py b/index_downloader.py index 2b1452d..973f230 100644 --- a/index_downloader.py +++ b/index_downloader.py @@ -3,6 +3,7 @@ import re import akshare as ak import pandas as pd from loguru import logger +from index_spot import stock_zh_index_spot_em # index_hist_df = ak.index_zh_a_hist( # symbol="000001", # 指数代码,如上证指数 @@ -17,7 +18,7 @@ def get_all_stock_index(): index_df_list = [] for source in index_choice: logger.info(f"正在获取 {source}...") - index_df = ak.stock_zh_index_spot_em(symbol=source) + index_df = stock_zh_index_spot_em(symbol=source) index_df["symbol"] = source index_df_list.append(index_df) logger.info(f"{source}: {index_df.shape[0]}") @@ -70,7 +71,7 @@ def get_index_fund_info(): if __name__ == "__main__": - # df = get_all_stock_index() + df = get_all_stock_index() # df.to_csv("index_all_stock.csv", index=False, encoding="utf-8-sig") # res = ak.fund_etf_spot_em() @@ -79,11 +80,11 @@ if __name__ == "__main__": # df = get_index_fund_info() # df.to_csv("index_fund_info.csv", index=False, encoding="utf-8-sig") - import akshare as ak - import pandas as pd + # import akshare as ak + # import pandas as pd - for symbol in ["NVDA", "AAPL", "MSFT", "AMZN", "TSLA", "META", "GOOGL"]: - stock_us_daily_df = ak.stock_us_daily(symbol=symbol, adjust="qfq") - stock_us_daily_df.to_csv( - f"{symbol}_stock_us_daily.csv", index=False, encoding="utf-8-sig" - ) + # for symbol in ["NVDA", "AAPL", "MSFT", "AMZN", "TSLA", "META", "GOOGL"]: + # stock_us_daily_df = ak.stock_us_daily(symbol=symbol, adjust="qfq") + # stock_us_daily_df.to_csv( + # f"{symbol}_stock_us_daily.csv", index=False, encoding="utf-8-sig" + # ) diff --git a/index_spot.py b/index_spot.py new file mode 100644 index 0000000..f49a800 --- /dev/null +++ b/index_spot.py @@ -0,0 +1,214 @@ +import logging +import pandas as pd +import requests +import math +import time +from typing import List, Dict +from akshare.utils.tqdm import get_tqdm +from loguru import logger +def __stock_zh_main_spot_em() -> pd.DataFrame: + """ + 东方财富网-行情中心-沪深重要指数 + https://quote.eastmoney.com/center/hszs.html + :return: 指数的实时行情数据 + :rtype: pandas.DataFrame + """ + url = "https://33.push2.eastmoney.com/api/qt/clist/get" + params = { + "pn": "1", + "pz": "100", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "dect": "1", + "wbp2u": "|0|0|0|web", + "fid": "", + "fs": "b:MK0010", + "fields": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21," + "f23,f24,f25,f26,f22,f11,f62,f128,f136,f115,f152", + } + r = requests.get(url, params=params) + data_json = r.json() + temp_df = pd.DataFrame(data_json["data"]["diff"]) + temp_df.reset_index(inplace=True) + temp_df["index"] = temp_df["index"].astype(int) + 1 + temp_df.rename( + columns={ + "index": "序号", + "f2": "最新价", + "f3": "涨跌幅", + "f4": "涨跌额", + "f5": "成交量", + "f6": "成交额", + "f7": "振幅", + "f10": "量比", + "f12": "代码", + "f14": "名称", + "f15": "最高", + "f16": "最低", + "f17": "今开", + "f18": "昨收", + }, + inplace=True, + ) + temp_df = temp_df[ + [ + "序号", + "代码", + "名称", + "最新价", + "涨跌幅", + "涨跌额", + "成交量", + "成交额", + "振幅", + "最高", + "最低", + "今开", + "昨收", + "量比", + ] + ] + temp_df["最新价"] = pd.to_numeric(temp_df["最新价"], errors="coerce") + temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce") + temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"], errors="coerce") + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce") + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce") + temp_df["振幅"] = pd.to_numeric(temp_df["振幅"], errors="coerce") + temp_df["最高"] = pd.to_numeric(temp_df["最高"], errors="coerce") + temp_df["最低"] = pd.to_numeric(temp_df["最低"], errors="coerce") + temp_df["今开"] = pd.to_numeric(temp_df["今开"], errors="coerce") + temp_df["昨收"] = pd.to_numeric(temp_df["昨收"], errors="coerce") + temp_df["量比"] = pd.to_numeric(temp_df["量比"], errors="coerce") + return temp_df + + +def fetch_paginated_data(url: str, base_params: Dict, timeout: int = 15*2): + """ + 东方财富-分页获取数据并合并结果 + https://quote.eastmoney.com/f1.html?newcode=0.000001 + :param url: 股票代码 + :type url: str + :param base_params: 基础请求参数 + :type base_params: dict + :param timeout: 请求超时时间 + :type timeout: str + :return: 合并后的数据 + :rtype: pandas.DataFrame + """ + # 复制参数以避免修改原始参数 + params = base_params.copy() + # 获取第一页数据,用于确定分页信息 + r = requests.get(url, params=params, timeout=timeout) + data_json = r.json() + # 计算分页信息 + per_page_num = len(data_json["data"]["diff"]) + total_page = math.ceil(data_json["data"]["total"] / per_page_num) + # 存储所有页面数据 + temp_list = [] + # 添加第一页数据 + temp_list.append(pd.DataFrame(data_json["data"]["diff"])) + # 获取进度条 + tqdm = get_tqdm() + # 获取剩余页面数据 + for page in range(2, total_page + 1): + logger.info(f"获取第 {page}/{total_page} 页数据") + params.update({"pn": page}) + r = requests.get(url, params=params, timeout=timeout) + data_json = r.json() + inner_temp_df = pd.DataFrame(data_json["data"]["diff"]) + temp_list.append(inner_temp_df) + time.sleep(300) + # 合并所有数据 + temp_df = pd.concat(temp_list, ignore_index=True) + temp_df["f3"] = pd.to_numeric(temp_df["f3"], errors="coerce") + temp_df.sort_values(by=["f3"], ascending=False, inplace=True, ignore_index=True) + temp_df.reset_index(inplace=True) + temp_df["index"] = temp_df["index"].astype(int) + 1 + return temp_df + +def stock_zh_index_spot_em(symbol: str = "上证系列指数") -> pd.DataFrame: + """ + 东方财富网-行情中心-沪深京指数 + https://quote.eastmoney.com/center/gridlist.html#index_sz + :param symbol: "上证系列指数"; choice of {"沪深重要指数", "上证系列指数", "深证系列指数", "指数成份", "中证系列指数"} + :type symbol: str + :return: 指数的实时行情数据 + :rtype: pandas.DataFrame + """ + if symbol == "沪深重要指数": + return __stock_zh_main_spot_em() + + url = "https://48.push2.eastmoney.com/api/qt/clist/get" + symbol_map = { + "上证系列指数": "m:1+t:1", + "深证系列指数": "m:0 t:5", + "指数成份": "m:1+s:3,m:0+t:5", + "中证系列指数": "m:2", + } + params = { + "pn": "1", + "pz": "100", + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "wbp2u": "|0|0|0|web", + "fid": "f12", + "fs": symbol_map[symbol], + "fields": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25," + "f26,f22,f33,f11,f62,f128,f136,f115,f152", + } + temp_df = fetch_paginated_data(url, params) + temp_df.rename( + columns={ + "index": "序号", + "f2": "最新价", + "f3": "涨跌幅", + "f4": "涨跌额", + "f5": "成交量", + "f6": "成交额", + "f7": "振幅", + "f10": "量比", + "f12": "代码", + "f14": "名称", + "f15": "最高", + "f16": "最低", + "f17": "今开", + "f18": "昨收", + }, + inplace=True, + ) + temp_df = temp_df[ + [ + "序号", + "代码", + "名称", + "最新价", + "涨跌幅", + "涨跌额", + "成交量", + "成交额", + "振幅", + "最高", + "最低", + "今开", + "昨收", + "量比", + ] + ] + temp_df["最新价"] = pd.to_numeric(temp_df["最新价"], errors="coerce") + temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce") + temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"], errors="coerce") + temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce") + temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce") + temp_df["振幅"] = pd.to_numeric(temp_df["振幅"], errors="coerce") + temp_df["最高"] = pd.to_numeric(temp_df["最高"], errors="coerce") + temp_df["最低"] = pd.to_numeric(temp_df["最低"], errors="coerce") + temp_df["今开"] = pd.to_numeric(temp_df["今开"], errors="coerce") + temp_df["昨收"] = pd.to_numeric(temp_df["昨收"], errors="coerce") + temp_df["量比"] = pd.to_numeric(temp_df["量比"], errors="coerce") + return temp_df \ No newline at end of file