829 lines
28 KiB
Plaintext
829 lines
28 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "480f73ee",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "94725a95",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df = pd.read_feather(\n",
|
||
" \"/Users/aszer/Documents/vscode/bet/data/api_signal_res/api_res_merged_processed.feather\"\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "2c620ff4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/var/folders/km/5gc2fwqs19sbr04h1_j5_rzw0000gn/T/ipykernel_7502/2562396296.py:1: DtypeWarning: Columns (43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" df = pd.read_csv(\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df = pd.read_csv(\n",
|
||
" \"/Users/aszer/Documents/vscode/bet/data/pinnical_1xbet_all_api.csv\",\n",
|
||
" encoding=\"utf-8-sig\",\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "a062a651",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['id', 'sportsbook', 'sport', 'league', 'fixture_id', 'game_id',\n",
|
||
" 'market', 'grouping_key', 'timestamp', 'first_name', 'second_name',\n",
|
||
" 'first_selection', 'second_selection', 'first_selection_line',\n",
|
||
" 'second_selection_line', 'first_selection_points',\n",
|
||
" 'second_selection_points', 'first_points', 'second_points',\n",
|
||
" 'first_deep_link', 'second_deep_link', 'first_price', 'second_price',\n",
|
||
" 'first_novig_price', 'second_novig_price', 'first_power_novig_price',\n",
|
||
" 'second_power_novig_price', 'market_width', 'sportsbook_count',\n",
|
||
" 'time_diff', 'pinnacle_novig_begin_first',\n",
|
||
" 'pinnacle_novig_begin_second', 'pinnacle_novig_realtime_first',\n",
|
||
" 'pinnacle_novig_realtime_second', 'pinnacle_power_novig_begin_first',\n",
|
||
" 'pinnacle_power_novig_begin_second',\n",
|
||
" 'pinnacle_power_novig_realtime_first',\n",
|
||
" 'pinnacle_power_novig_realtime_second', 'max_price_realtime_first',\n",
|
||
" 'max_price_realtime_second', 'gmt_created', 'gmt_modified', 'bet_id',\n",
|
||
" 'max_price_sportsbook_realtime_first',\n",
|
||
" 'max_price_sportsbook_realtime_second', 'bet_status', 'result', 'rn'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "49cc8dbb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df = df[\n",
|
||
" [\n",
|
||
" \"sportsbook\",\n",
|
||
" \"sport\",\n",
|
||
" \"league\",\n",
|
||
" \"fixture_id\",\n",
|
||
" \"game_id\",\n",
|
||
" \"market\",\n",
|
||
" \"first_price\",\n",
|
||
" \"second_price\",\n",
|
||
" \"market_width\",\n",
|
||
" \"result\",\n",
|
||
" ]\n",
|
||
"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "b118efae",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0 lost\n",
|
||
"5 refunded\n",
|
||
"18 won\n",
|
||
"Name: result, dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df[\"result\"].drop_duplicates()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "881f62ec",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from common.bet_tools import calculate_no_vig_moneyline_power, moneyline_to_prob\n",
|
||
"\n",
|
||
"def get_no_vig_prob(row):\n",
|
||
" odds = [row[\"first_price\"], row[\"second_price\"]]\n",
|
||
" no_vig_odds_power = calculate_no_vig_moneyline_power(odds)\n",
|
||
" novig_probs_power = [moneyline_to_prob(o) for o in no_vig_odds_power]\n",
|
||
" # 返回两个无水概率\n",
|
||
" return pd.Series(\n",
|
||
" {\n",
|
||
" \"first_no_vig_prob\": novig_probs_power[0],\n",
|
||
" \"second_no_vig_prob\": novig_probs_power[1],\n",
|
||
" }\n",
|
||
" )\n",
|
||
"\n",
|
||
"# 防止 SettingWithCopyWarning,推荐使用 .loc 显式分配\n",
|
||
"df.loc[:, [\"first_no_vig_prob\", \"second_no_vig_prob\"]] = df[[\"first_price\", \"second_price\"]].apply(get_no_vig_prob, axis=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "5f066d95",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>sportsbook</th>\n",
|
||
" <th>sport</th>\n",
|
||
" <th>league</th>\n",
|
||
" <th>fixture_id</th>\n",
|
||
" <th>game_id</th>\n",
|
||
" <th>market</th>\n",
|
||
" <th>first_price</th>\n",
|
||
" <th>second_price</th>\n",
|
||
" <th>market_width</th>\n",
|
||
" <th>result</th>\n",
|
||
" <th>first_no_vig_prob</th>\n",
|
||
" <th>second_no_vig_prob</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>baseball</td>\n",
|
||
" <td>CPBL</td>\n",
|
||
" <td>202507040615CEC8</td>\n",
|
||
" <td>12309-16681-2025-07-04-03</td>\n",
|
||
" <td>1st Inning Total Runs</td>\n",
|
||
" <td>104</td>\n",
|
||
" <td>-145</td>\n",
|
||
" <td>41.0</td>\n",
|
||
" <td>lost</td>\n",
|
||
" <td>0.447007</td>\n",
|
||
" <td>0.552993</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>baseball</td>\n",
|
||
" <td>CPBL</td>\n",
|
||
" <td>2025070446F737B0</td>\n",
|
||
" <td>26391-13841-2025-07-04-03</td>\n",
|
||
" <td>Total Runs</td>\n",
|
||
" <td>162</td>\n",
|
||
" <td>-263</td>\n",
|
||
" <td>101.0</td>\n",
|
||
" <td>lost</td>\n",
|
||
" <td>0.318238</td>\n",
|
||
" <td>0.681762</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>baseball</td>\n",
|
||
" <td>CPBL</td>\n",
|
||
" <td>20250704AD96F740</td>\n",
|
||
" <td>26321-38692-2025-07-04-03</td>\n",
|
||
" <td>1st Half Run Line</td>\n",
|
||
" <td>-159</td>\n",
|
||
" <td>109</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>lost</td>\n",
|
||
" <td>0.571055</td>\n",
|
||
" <td>0.428945</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>baseball</td>\n",
|
||
" <td>CPBL</td>\n",
|
||
" <td>20250704AD96F740</td>\n",
|
||
" <td>26321-38692-2025-07-04-03</td>\n",
|
||
" <td>Run Line</td>\n",
|
||
" <td>-122</td>\n",
|
||
" <td>-119</td>\n",
|
||
" <td>41.0</td>\n",
|
||
" <td>lost</td>\n",
|
||
" <td>0.503229</td>\n",
|
||
" <td>0.496771</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>baseball</td>\n",
|
||
" <td>CPBL</td>\n",
|
||
" <td>20250704AD96F740</td>\n",
|
||
" <td>26321-38692-2025-07-04-03</td>\n",
|
||
" <td>Team Total</td>\n",
|
||
" <td>107</td>\n",
|
||
" <td>-156</td>\n",
|
||
" <td>49.0</td>\n",
|
||
" <td>lost</td>\n",
|
||
" <td>0.433727</td>\n",
|
||
" <td>0.566273</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>629646</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>tennis</td>\n",
|
||
" <td>WTA</td>\n",
|
||
" <td>2025101255331024</td>\n",
|
||
" <td>21423-24990-2025-40</td>\n",
|
||
" <td>Game Spread</td>\n",
|
||
" <td>-103</td>\n",
|
||
" <td>-118</td>\n",
|
||
" <td>21.0</td>\n",
|
||
" <td>lost</td>\n",
|
||
" <td>0.482649</td>\n",
|
||
" <td>0.517351</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>629647</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>tennis</td>\n",
|
||
" <td>WTA</td>\n",
|
||
" <td>2025101255331024</td>\n",
|
||
" <td>21423-24990-2025-40</td>\n",
|
||
" <td>Player Games Won</td>\n",
|
||
" <td>-338</td>\n",
|
||
" <td>250</td>\n",
|
||
" <td>88.0</td>\n",
|
||
" <td>lost</td>\n",
|
||
" <td>0.750412</td>\n",
|
||
" <td>0.249588</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>629648</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>tennis</td>\n",
|
||
" <td>WTA</td>\n",
|
||
" <td>202510127155911C</td>\n",
|
||
" <td>15608-13454-2025-40</td>\n",
|
||
" <td>1st Set Total Games</td>\n",
|
||
" <td>391</td>\n",
|
||
" <td>-539</td>\n",
|
||
" <td>148.0</td>\n",
|
||
" <td>lost</td>\n",
|
||
" <td>0.171739</td>\n",
|
||
" <td>0.828261</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>629649</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>tennis</td>\n",
|
||
" <td>WTA</td>\n",
|
||
" <td>202510127155911C</td>\n",
|
||
" <td>15608-13454-2025-40</td>\n",
|
||
" <td>Moneyline</td>\n",
|
||
" <td>-377</td>\n",
|
||
" <td>295</td>\n",
|
||
" <td>82.0</td>\n",
|
||
" <td>won</td>\n",
|
||
" <td>0.774719</td>\n",
|
||
" <td>0.225281</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>629650</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>tennis</td>\n",
|
||
" <td>WTA</td>\n",
|
||
" <td>202510127155911C</td>\n",
|
||
" <td>15608-13454-2025-40</td>\n",
|
||
" <td>Total Games</td>\n",
|
||
" <td>-151</td>\n",
|
||
" <td>125</td>\n",
|
||
" <td>26.0</td>\n",
|
||
" <td>lost</td>\n",
|
||
" <td>0.580343</td>\n",
|
||
" <td>0.419657</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>629651 rows × 12 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" sportsbook sport league fixture_id \\\n",
|
||
"0 1XBet baseball CPBL 202507040615CEC8 \n",
|
||
"1 1XBet baseball CPBL 2025070446F737B0 \n",
|
||
"2 1XBet baseball CPBL 20250704AD96F740 \n",
|
||
"3 1XBet baseball CPBL 20250704AD96F740 \n",
|
||
"4 1XBet baseball CPBL 20250704AD96F740 \n",
|
||
"... ... ... ... ... \n",
|
||
"629646 Pinnacle tennis WTA 2025101255331024 \n",
|
||
"629647 Pinnacle tennis WTA 2025101255331024 \n",
|
||
"629648 Pinnacle tennis WTA 202510127155911C \n",
|
||
"629649 Pinnacle tennis WTA 202510127155911C \n",
|
||
"629650 Pinnacle tennis WTA 202510127155911C \n",
|
||
"\n",
|
||
" game_id market first_price \\\n",
|
||
"0 12309-16681-2025-07-04-03 1st Inning Total Runs 104 \n",
|
||
"1 26391-13841-2025-07-04-03 Total Runs 162 \n",
|
||
"2 26321-38692-2025-07-04-03 1st Half Run Line -159 \n",
|
||
"3 26321-38692-2025-07-04-03 Run Line -122 \n",
|
||
"4 26321-38692-2025-07-04-03 Team Total 107 \n",
|
||
"... ... ... ... \n",
|
||
"629646 21423-24990-2025-40 Game Spread -103 \n",
|
||
"629647 21423-24990-2025-40 Player Games Won -338 \n",
|
||
"629648 15608-13454-2025-40 1st Set Total Games 391 \n",
|
||
"629649 15608-13454-2025-40 Moneyline -377 \n",
|
||
"629650 15608-13454-2025-40 Total Games -151 \n",
|
||
"\n",
|
||
" second_price market_width result first_no_vig_prob \\\n",
|
||
"0 -145 41.0 lost 0.447007 \n",
|
||
"1 -263 101.0 lost 0.318238 \n",
|
||
"2 109 50.0 lost 0.571055 \n",
|
||
"3 -119 41.0 lost 0.503229 \n",
|
||
"4 -156 49.0 lost 0.433727 \n",
|
||
"... ... ... ... ... \n",
|
||
"629646 -118 21.0 lost 0.482649 \n",
|
||
"629647 250 88.0 lost 0.750412 \n",
|
||
"629648 -539 148.0 lost 0.171739 \n",
|
||
"629649 295 82.0 won 0.774719 \n",
|
||
"629650 125 26.0 lost 0.580343 \n",
|
||
"\n",
|
||
" second_no_vig_prob \n",
|
||
"0 0.552993 \n",
|
||
"1 0.681762 \n",
|
||
"2 0.428945 \n",
|
||
"3 0.496771 \n",
|
||
"4 0.566273 \n",
|
||
"... ... \n",
|
||
"629646 0.517351 \n",
|
||
"629647 0.249588 \n",
|
||
"629648 0.828261 \n",
|
||
"629649 0.225281 \n",
|
||
"629650 0.419657 \n",
|
||
"\n",
|
||
"[629651 rows x 12 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "60fda142",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"id": "e3922153",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>sportsbook</th>\n",
|
||
" <th>sport</th>\n",
|
||
" <th>count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>soccer</td>\n",
|
||
" <td>170549</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>basketball</td>\n",
|
||
" <td>166590</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>tennis</td>\n",
|
||
" <td>114015</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>baseball</td>\n",
|
||
" <td>64485</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>tennis</td>\n",
|
||
" <td>48314</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>football</td>\n",
|
||
" <td>36019</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>baseball</td>\n",
|
||
" <td>13706</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>basketball</td>\n",
|
||
" <td>8588</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>1XBet</td>\n",
|
||
" <td>hockey</td>\n",
|
||
" <td>3441</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>soccer</td>\n",
|
||
" <td>2435</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>football</td>\n",
|
||
" <td>1477</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>Pinnacle</td>\n",
|
||
" <td>hockey</td>\n",
|
||
" <td>32</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" sportsbook sport count\n",
|
||
"0 1XBet soccer 170549\n",
|
||
"1 1XBet basketball 166590\n",
|
||
"2 1XBet tennis 114015\n",
|
||
"3 1XBet baseball 64485\n",
|
||
"4 Pinnacle tennis 48314\n",
|
||
"5 1XBet football 36019\n",
|
||
"6 Pinnacle baseball 13706\n",
|
||
"7 Pinnacle basketball 8588\n",
|
||
"8 1XBet hockey 3441\n",
|
||
"9 Pinnacle soccer 2435\n",
|
||
"10 Pinnacle football 1477\n",
|
||
"11 Pinnacle hockey 32"
|
||
]
|
||
},
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df[[\"sportsbook\", \"sport\"]].value_counts().reset_index()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "c9b48951",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df['win_prob'] = df['first_no_vig_prob']\n",
|
||
"df['res'] = df['result']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "6c575da9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"id": "2dfaf8ca",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"['1XBet' 'baseball'] 64485\n",
|
||
"{'logloss': 0.6387950997506274, 'brier': 0.22453871048598073, 'ece': 0.05136008172379394, 'accuracy': 0.6251531363883074, 'reg_alpha': -0.22365612018752326, 'reg_beta': 0.816534516967482, 'n_samples': 64485, 'filter_cols': '1XBet,baseball'}\n",
|
||
"['1XBet' 'basketball'] 166590\n",
|
||
"{'logloss': 0.669273027668078, 'brier': 0.23856284575034065, 'ece': 0.009400017057809669, 'accuracy': 0.5751905876703284, 'reg_alpha': 0.034476957539975685, 'reg_beta': 0.8760036858377837, 'n_samples': 166590, 'filter_cols': '1XBet,basketball'}\n",
|
||
"['1XBet' 'football'] 36019\n",
|
||
"{'logloss': 0.5156283069611564, 'brier': 0.17093288618023667, 'ece': 0.03737481116887414, 'accuracy': 0.7366389960853994, 'reg_alpha': -0.22019343021598026, 'reg_beta': 0.9096828468608887, 'n_samples': 36019, 'filter_cols': '1XBet,football'}\n",
|
||
"['1XBet' 'hockey'] 3441\n",
|
||
"['1XBet' 'soccer'] 170549\n",
|
||
"{'logloss': 0.5326802320693952, 'brier': 0.17692943714557405, 'ece': 0.03728482187098291, 'accuracy': 0.7336014869626911, 'reg_alpha': -0.18193411314913413, 'reg_beta': 0.7726877806789224, 'n_samples': 170549, 'filter_cols': '1XBet,soccer'}\n",
|
||
"['1XBet' 'tennis'] 114015\n",
|
||
"{'logloss': 0.6389011505288847, 'brier': 0.22439917328514708, 'ece': 0.01927127655619132, 'accuracy': 0.6223479366749989, 'reg_alpha': -0.06976580882770708, 'reg_beta': 0.8523393655794403, 'n_samples': 114015, 'filter_cols': '1XBet,tennis'}\n",
|
||
"['Pinnacle' 'baseball'] 13706\n",
|
||
"{'logloss': 0.6488942536621299, 'brier': 0.2288787220784783, 'ece': 0.01759828591637442, 'accuracy': 0.6151320589522836, 'reg_alpha': -0.06787819238175896, 'reg_beta': 0.9034496569376994, 'n_samples': 13706, 'filter_cols': 'Pinnacle,baseball'}\n",
|
||
"['Pinnacle' 'basketball'] 8588\n",
|
||
"['Pinnacle' 'football'] 1477\n",
|
||
"['Pinnacle' 'hockey'] 32\n",
|
||
"['Pinnacle' 'soccer'] 2435\n",
|
||
"['Pinnacle' 'tennis'] 48314\n",
|
||
"{'logloss': 0.6472225570749982, 'brier': 0.22852018039069258, 'ece': 0.018568682287188856, 'accuracy': 0.605766444508838, 'reg_alpha': -0.07073057145248554, 'reg_beta': 0.9334853391615549, 'n_samples': 48314, 'filter_cols': 'Pinnacle,tennis'}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from pinnacle_experiments import compute_metrics\n",
|
||
"data_df = df.copy()\n",
|
||
"data_list = []\n",
|
||
"cols = [\"sportsbook\",\"sport\"]\n",
|
||
"for cs in data_df[cols].drop_duplicates().values:\n",
|
||
" tmp_df = data_df[cols + [\"win_prob\", \"res\"]].copy()\n",
|
||
" for i, col in enumerate(cols):\n",
|
||
" tmp_df = tmp_df[tmp_df[col] == cs[i]]\n",
|
||
" print(cs, len(tmp_df))\n",
|
||
" if len(tmp_df) < 10000:\n",
|
||
" continue\n",
|
||
" res = compute_metrics(tmp_df)\n",
|
||
" res[\"filter_cols\"] = \",\".join(cs)\n",
|
||
" data_list.append(res)\n",
|
||
" print(res)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"id": "ace930ea",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>logloss</th>\n",
|
||
" <th>brier</th>\n",
|
||
" <th>ece</th>\n",
|
||
" <th>accuracy</th>\n",
|
||
" <th>reg_alpha</th>\n",
|
||
" <th>reg_beta</th>\n",
|
||
" <th>n_samples</th>\n",
|
||
" <th>filter_cols</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>0.515628</td>\n",
|
||
" <td>0.170933</td>\n",
|
||
" <td>0.037375</td>\n",
|
||
" <td>0.736639</td>\n",
|
||
" <td>0.220193</td>\n",
|
||
" <td>0.909683</td>\n",
|
||
" <td>36019</td>\n",
|
||
" <td>1XBet,football</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>0.532680</td>\n",
|
||
" <td>0.176929</td>\n",
|
||
" <td>0.037285</td>\n",
|
||
" <td>0.733601</td>\n",
|
||
" <td>0.181934</td>\n",
|
||
" <td>0.772688</td>\n",
|
||
" <td>170549</td>\n",
|
||
" <td>1XBet,soccer</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>0.638901</td>\n",
|
||
" <td>0.224399</td>\n",
|
||
" <td>0.019271</td>\n",
|
||
" <td>0.622348</td>\n",
|
||
" <td>0.069766</td>\n",
|
||
" <td>0.852339</td>\n",
|
||
" <td>114015</td>\n",
|
||
" <td>1XBet,tennis</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0.638795</td>\n",
|
||
" <td>0.224539</td>\n",
|
||
" <td>0.051360</td>\n",
|
||
" <td>0.625153</td>\n",
|
||
" <td>0.223656</td>\n",
|
||
" <td>0.816535</td>\n",
|
||
" <td>64485</td>\n",
|
||
" <td>1XBet,baseball</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>0.647223</td>\n",
|
||
" <td>0.228520</td>\n",
|
||
" <td>0.018569</td>\n",
|
||
" <td>0.605766</td>\n",
|
||
" <td>0.070731</td>\n",
|
||
" <td>0.933485</td>\n",
|
||
" <td>48314</td>\n",
|
||
" <td>Pinnacle,tennis</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>0.648894</td>\n",
|
||
" <td>0.228879</td>\n",
|
||
" <td>0.017598</td>\n",
|
||
" <td>0.615132</td>\n",
|
||
" <td>0.067878</td>\n",
|
||
" <td>0.903450</td>\n",
|
||
" <td>13706</td>\n",
|
||
" <td>Pinnacle,baseball</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0.669273</td>\n",
|
||
" <td>0.238563</td>\n",
|
||
" <td>0.009400</td>\n",
|
||
" <td>0.575191</td>\n",
|
||
" <td>0.034477</td>\n",
|
||
" <td>0.876004</td>\n",
|
||
" <td>166590</td>\n",
|
||
" <td>1XBet,basketball</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" logloss brier ece accuracy reg_alpha reg_beta n_samples \\\n",
|
||
"2 0.515628 0.170933 0.037375 0.736639 0.220193 0.909683 36019 \n",
|
||
"3 0.532680 0.176929 0.037285 0.733601 0.181934 0.772688 170549 \n",
|
||
"4 0.638901 0.224399 0.019271 0.622348 0.069766 0.852339 114015 \n",
|
||
"0 0.638795 0.224539 0.051360 0.625153 0.223656 0.816535 64485 \n",
|
||
"6 0.647223 0.228520 0.018569 0.605766 0.070731 0.933485 48314 \n",
|
||
"5 0.648894 0.228879 0.017598 0.615132 0.067878 0.903450 13706 \n",
|
||
"1 0.669273 0.238563 0.009400 0.575191 0.034477 0.876004 166590 \n",
|
||
"\n",
|
||
" filter_cols \n",
|
||
"2 1XBet,football \n",
|
||
"3 1XBet,soccer \n",
|
||
"4 1XBet,tennis \n",
|
||
"0 1XBet,baseball \n",
|
||
"6 Pinnacle,tennis \n",
|
||
"5 Pinnacle,baseball \n",
|
||
"1 1XBet,basketball "
|
||
]
|
||
},
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"res_df = pd.DataFrame(data_list)\n",
|
||
"res_df[\"reg_alpha\"] = abs(res_df[\"reg_alpha\"])\n",
|
||
"res_df = res_df.sort_values(by=[ \"brier\", \"logloss\", \"ece\", \"reg_alpha\"])\n",
|
||
"res_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"id": "402f0cfa",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"res_df.to_csv(\"data/pinnacle_experiments.csv\", index=False, encoding=\"utf-8-sig\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.12"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|