数据预处理
This commit is contained in:
33
oddsjam_processed_data.py
Normal file
33
oddsjam_processed_data.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
|
||||
def restore_table_from_feather(feather_path, ddl_columns):
|
||||
df = pd.read_feather(feather_path)
|
||||
# 假设只有一列
|
||||
col = df.columns[0]
|
||||
sample = df[col].iloc[0]
|
||||
sep = '\t'
|
||||
split_df = df[col].str.split(sep, expand=True)
|
||||
split_df.columns = ddl_columns
|
||||
return split_df
|
||||
|
||||
# DDL中的字段名列表
|
||||
ddl_columns = [
|
||||
'id', 'sportsbook', 'sport', 'league', 'fixture_id', 'game_id', 'market', 'grouping_key', 'timestamp',
|
||||
'first_name', 'second_name', 'first_selection', 'second_selection', 'first_selection_line', 'second_selection_line',
|
||||
'first_selection_points', 'second_selection_points', 'first_points', 'second_points', 'first_deep_link',
|
||||
'second_deep_link', 'first_price', 'second_price', 'first_novig_price', 'second_novig_price',
|
||||
'first_power_novig_price', 'second_power_novig_price', 'market_width', 'sportsbook_count', 'time_diff',
|
||||
'pinnacle_novig_begin_first', 'pinnacle_novig_begin_second', 'pinnacle_novig_realtime_first',
|
||||
'pinnacle_novig_realtime_second', 'pinnacle_power_novig_begin_first', 'pinnacle_power_novig_begin_second',
|
||||
'pinnacle_power_novig_realtime_first', 'pinnacle_power_novig_realtime_second', 'max_price_realtime_first',
|
||||
'max_price_realtime_second', 'gmt_created', 'gmt_modified', 'bet_id', 'max_price_sportsbook_realtime_first',
|
||||
'max_price_sportsbook_realtime_second', 'bet_status', 'res'
|
||||
]
|
||||
|
||||
# 使用方法
|
||||
restored_df = restore_table_from_feather(
|
||||
'/Users/aszer/Documents/vscode/bet/data/api_signal_res/api_res_merged.feather',
|
||||
ddl_columns
|
||||
)
|
||||
restored_df.to_csv('data/api_signal_res/api_res_merged_processed.csv', index=False, encoding='utf-8-sig')
|
||||
print(restored_df.head())
|
||||
Reference in New Issue
Block a user