From 681d5b612e742a012bf817452c46b1deaea4bf50 Mon Sep 17 00:00:00 2001 From: aszerW Date: Sun, 12 Oct 2025 17:04:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=8C=87=E6=95=B0=E5=92=8C?= =?UTF-8?q?=E5=9F=BA=E9=87=91=E6=98=A0=E5=B0=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- index_downloader.py | 69 ++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/index_downloader.py b/index_downloader.py index d6c523f..9294103 100644 --- a/index_downloader.py +++ b/index_downloader.py @@ -1,5 +1,5 @@ import os - +import re import akshare as ak import pandas as pd from loguru import logger @@ -25,27 +25,56 @@ def get_all_stock_index(): return df +def get_index_fund_info(): + # 读取指数数据和基金数据 + index_df = pd.read_csv( + "/Users/aszer/Documents/vscode/etf/data/index_all_stock.csv", + encoding="utf-8-sig", + ) + fund_df = pd.read_csv( + "/Users/aszer/Documents/vscode/etf/data/fund_info.csv", encoding="utf-8-sig" + ) + + # 构建指数名称集合去重,加快后续匹配 + index_name_set = set(index_df["名称"].astype(str).unique()) + + # 对每个基金名称,查找其是否包含某一指数名称,允许多对多匹配 + records = [] + for fund_idx, fund_row in fund_df.iterrows(): + fund_name = str(fund_row["基金名称"]) + fund_code = str(fund_row["基金代码"]) + funf_fee = fund_row["手续费"] + matched_index_list = [ + idx_name + for idx_name in index_name_set + if idx_name.lower() in fund_name.lower() + ] + for idx_name in matched_index_list: + # 找到指数的相关代码 + index_row = index_df[index_df["名称"] == idx_name] + index_code = None + if not index_row.empty: + index_code = str(index_row.iloc[0]["代码"]) + records.append( + { + "指数代码": index_code, + "指数名称": idx_name, + "基金代码": fund_code, + "基金名称": fund_name, + "手续费": funf_fee, + } + ) + index_fund_df = pd.DataFrame(records) + index_fund_df = index_fund_df.sort_values("指数代码").reset_index(drop=True) + return index_fund_df + + if __name__ == "__main__": # df = get_all_stock_index() # df.to_csv("index_all_stock.csv", index=False, encoding="utf-8-sig") - df = pd.read_csv("index_all_stock.csv", encoding="utf-8-sig") - for index, row in df.iterrows(): - print(f"正在获取 {index} {row['代码']} ...") - file_path = f"data/{row['代码']}.csv" - if os.path.exists(file_path): - print(f"{row['代码']} 存在") - continue - index_hist_df = ak.index_zh_a_hist( - symbol=row["代码"], # 指数代码,如上证指数 - period="daily", # K线周期: daily(日K) - start_date="19700101", # 开始日期 - end_date="22220101", # 结束日期 - ) - index_hist_df.to_csv( - f"data/{row['代码']}.csv", index=False, encoding="utf-8-sig" - ) - # time.sleep(15) - # print(index_hist_df) + # res = ak.fund_etf_spot_em() + # print(res) - ... + df = get_index_fund_info() + df.to_csv("index_fund_info.csv", index=False, encoding="utf-8-sig")