处理指数和基金映射
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import akshare as ak
|
import akshare as ak
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
@@ -25,27 +25,56 @@ def get_all_stock_index():
|
|||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def get_index_fund_info():
|
||||||
|
# 读取指数数据和基金数据
|
||||||
|
index_df = pd.read_csv(
|
||||||
|
"/Users/aszer/Documents/vscode/etf/data/index_all_stock.csv",
|
||||||
|
encoding="utf-8-sig",
|
||||||
|
)
|
||||||
|
fund_df = pd.read_csv(
|
||||||
|
"/Users/aszer/Documents/vscode/etf/data/fund_info.csv", encoding="utf-8-sig"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 构建指数名称集合去重,加快后续匹配
|
||||||
|
index_name_set = set(index_df["名称"].astype(str).unique())
|
||||||
|
|
||||||
|
# 对每个基金名称,查找其是否包含某一指数名称,允许多对多匹配
|
||||||
|
records = []
|
||||||
|
for fund_idx, fund_row in fund_df.iterrows():
|
||||||
|
fund_name = str(fund_row["基金名称"])
|
||||||
|
fund_code = str(fund_row["基金代码"])
|
||||||
|
funf_fee = fund_row["手续费"]
|
||||||
|
matched_index_list = [
|
||||||
|
idx_name
|
||||||
|
for idx_name in index_name_set
|
||||||
|
if idx_name.lower() in fund_name.lower()
|
||||||
|
]
|
||||||
|
for idx_name in matched_index_list:
|
||||||
|
# 找到指数的相关代码
|
||||||
|
index_row = index_df[index_df["名称"] == idx_name]
|
||||||
|
index_code = None
|
||||||
|
if not index_row.empty:
|
||||||
|
index_code = str(index_row.iloc[0]["代码"])
|
||||||
|
records.append(
|
||||||
|
{
|
||||||
|
"指数代码": index_code,
|
||||||
|
"指数名称": idx_name,
|
||||||
|
"基金代码": fund_code,
|
||||||
|
"基金名称": fund_name,
|
||||||
|
"手续费": funf_fee,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
index_fund_df = pd.DataFrame(records)
|
||||||
|
index_fund_df = index_fund_df.sort_values("指数代码").reset_index(drop=True)
|
||||||
|
return index_fund_df
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# df = get_all_stock_index()
|
# df = get_all_stock_index()
|
||||||
# df.to_csv("index_all_stock.csv", index=False, encoding="utf-8-sig")
|
# df.to_csv("index_all_stock.csv", index=False, encoding="utf-8-sig")
|
||||||
|
|
||||||
df = pd.read_csv("index_all_stock.csv", encoding="utf-8-sig")
|
# res = ak.fund_etf_spot_em()
|
||||||
for index, row in df.iterrows():
|
# print(res)
|
||||||
print(f"正在获取 {index} {row['代码']} ...")
|
|
||||||
file_path = f"data/{row['代码']}.csv"
|
|
||||||
if os.path.exists(file_path):
|
|
||||||
print(f"{row['代码']} 存在")
|
|
||||||
continue
|
|
||||||
index_hist_df = ak.index_zh_a_hist(
|
|
||||||
symbol=row["代码"], # 指数代码,如上证指数
|
|
||||||
period="daily", # K线周期: daily(日K)
|
|
||||||
start_date="19700101", # 开始日期
|
|
||||||
end_date="22220101", # 结束日期
|
|
||||||
)
|
|
||||||
index_hist_df.to_csv(
|
|
||||||
f"data/{row['代码']}.csv", index=False, encoding="utf-8-sig"
|
|
||||||
)
|
|
||||||
# time.sleep(15)
|
|
||||||
# print(index_hist_df)
|
|
||||||
|
|
||||||
...
|
df = get_index_fund_info()
|
||||||
|
df.to_csv("index_fund_info.csv", index=False, encoding="utf-8-sig")
|
||||||
|
|||||||
Reference in New Issue
Block a user