数据预处理

This commit is contained in:
2025-10-12 23:26:47 +08:00
parent ffec93a76e
commit b73a96d3d1
2 changed files with 220 additions and 0 deletions

33
oddsjam_processed_data.py Normal file
View File

@@ -0,0 +1,33 @@
import pandas as pd
def restore_table_from_feather(feather_path, ddl_columns):
df = pd.read_feather(feather_path)
# 假设只有一列
col = df.columns[0]
sample = df[col].iloc[0]
sep = '\t'
split_df = df[col].str.split(sep, expand=True)
split_df.columns = ddl_columns
return split_df
# DDL中的字段名列表
ddl_columns = [
'id', 'sportsbook', 'sport', 'league', 'fixture_id', 'game_id', 'market', 'grouping_key', 'timestamp',
'first_name', 'second_name', 'first_selection', 'second_selection', 'first_selection_line', 'second_selection_line',
'first_selection_points', 'second_selection_points', 'first_points', 'second_points', 'first_deep_link',
'second_deep_link', 'first_price', 'second_price', 'first_novig_price', 'second_novig_price',
'first_power_novig_price', 'second_power_novig_price', 'market_width', 'sportsbook_count', 'time_diff',
'pinnacle_novig_begin_first', 'pinnacle_novig_begin_second', 'pinnacle_novig_realtime_first',
'pinnacle_novig_realtime_second', 'pinnacle_power_novig_begin_first', 'pinnacle_power_novig_begin_second',
'pinnacle_power_novig_realtime_first', 'pinnacle_power_novig_realtime_second', 'max_price_realtime_first',
'max_price_realtime_second', 'gmt_created', 'gmt_modified', 'bet_id', 'max_price_sportsbook_realtime_first',
'max_price_sportsbook_realtime_second', 'bet_status', 'res'
]
# 使用方法
restored_df = restore_table_from_feather(
'/Users/aszer/Documents/vscode/bet/data/api_signal_res/api_res_merged.feather',
ddl_columns
)
restored_df.to_csv('data/api_signal_res/api_res_merged_processed.csv', index=False, encoding='utf-8-sig')
print(restored_df.head())