{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "480f73ee",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "94725a95",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_feather(\n",
" \"/Users/aszer/Documents/vscode/bet/data/api_signal_res/api_res_merged_processed.feather\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2c620ff4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/km/5gc2fwqs19sbr04h1_j5_rzw0000gn/T/ipykernel_7502/2562396296.py:1: DtypeWarning: Columns (43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(\n"
]
}
],
"source": [
"df = pd.read_csv(\n",
" \"/Users/aszer/Documents/vscode/bet/data/pinnical_1xbet_all_api.csv\",\n",
" encoding=\"utf-8-sig\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a062a651",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['id', 'sportsbook', 'sport', 'league', 'fixture_id', 'game_id',\n",
" 'market', 'grouping_key', 'timestamp', 'first_name', 'second_name',\n",
" 'first_selection', 'second_selection', 'first_selection_line',\n",
" 'second_selection_line', 'first_selection_points',\n",
" 'second_selection_points', 'first_points', 'second_points',\n",
" 'first_deep_link', 'second_deep_link', 'first_price', 'second_price',\n",
" 'first_novig_price', 'second_novig_price', 'first_power_novig_price',\n",
" 'second_power_novig_price', 'market_width', 'sportsbook_count',\n",
" 'time_diff', 'pinnacle_novig_begin_first',\n",
" 'pinnacle_novig_begin_second', 'pinnacle_novig_realtime_first',\n",
" 'pinnacle_novig_realtime_second', 'pinnacle_power_novig_begin_first',\n",
" 'pinnacle_power_novig_begin_second',\n",
" 'pinnacle_power_novig_realtime_first',\n",
" 'pinnacle_power_novig_realtime_second', 'max_price_realtime_first',\n",
" 'max_price_realtime_second', 'gmt_created', 'gmt_modified', 'bet_id',\n",
" 'max_price_sportsbook_realtime_first',\n",
" 'max_price_sportsbook_realtime_second', 'bet_status', 'result', 'rn'],\n",
" dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "49cc8dbb",
"metadata": {},
"outputs": [],
"source": [
"df = df[\n",
" [\n",
" \"sportsbook\",\n",
" \"sport\",\n",
" \"league\",\n",
" \"fixture_id\",\n",
" \"game_id\",\n",
" \"market\",\n",
" \"first_price\",\n",
" \"second_price\",\n",
" \"market_width\",\n",
" \"result\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b118efae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 lost\n",
"5 refunded\n",
"18 won\n",
"Name: result, dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"result\"].drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "881f62ec",
"metadata": {},
"outputs": [],
"source": [
"from common.bet_tools import calculate_no_vig_moneyline_power, moneyline_to_prob\n",
"\n",
"def get_no_vig_prob(row):\n",
" odds = [row[\"first_price\"], row[\"second_price\"]]\n",
" no_vig_odds_power = calculate_no_vig_moneyline_power(odds)\n",
" novig_probs_power = [moneyline_to_prob(o) for o in no_vig_odds_power]\n",
" # 返回两个无水概率\n",
" return pd.Series(\n",
" {\n",
" \"first_no_vig_prob\": novig_probs_power[0],\n",
" \"second_no_vig_prob\": novig_probs_power[1],\n",
" }\n",
" )\n",
"\n",
"# 防止 SettingWithCopyWarning,推荐使用 .loc 显式分配\n",
"df.loc[:, [\"first_no_vig_prob\", \"second_no_vig_prob\"]] = df[[\"first_price\", \"second_price\"]].apply(get_no_vig_prob, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5f066d95",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sportsbook | \n",
" sport | \n",
" league | \n",
" fixture_id | \n",
" game_id | \n",
" market | \n",
" first_price | \n",
" second_price | \n",
" market_width | \n",
" result | \n",
" first_no_vig_prob | \n",
" second_no_vig_prob | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 202507040615CEC8 | \n",
" 12309-16681-2025-07-04-03 | \n",
" 1st Inning Total Runs | \n",
" 104 | \n",
" -145 | \n",
" 41.0 | \n",
" lost | \n",
" 0.447007 | \n",
" 0.552993 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 2025070446F737B0 | \n",
" 26391-13841-2025-07-04-03 | \n",
" Total Runs | \n",
" 162 | \n",
" -263 | \n",
" 101.0 | \n",
" lost | \n",
" 0.318238 | \n",
" 0.681762 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 20250704AD96F740 | \n",
" 26321-38692-2025-07-04-03 | \n",
" 1st Half Run Line | \n",
" -159 | \n",
" 109 | \n",
" 50.0 | \n",
" lost | \n",
" 0.571055 | \n",
" 0.428945 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 20250704AD96F740 | \n",
" 26321-38692-2025-07-04-03 | \n",
" Run Line | \n",
" -122 | \n",
" -119 | \n",
" 41.0 | \n",
" lost | \n",
" 0.503229 | \n",
" 0.496771 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 20250704AD96F740 | \n",
" 26321-38692-2025-07-04-03 | \n",
" Team Total | \n",
" 107 | \n",
" -156 | \n",
" 49.0 | \n",
" lost | \n",
" 0.433727 | \n",
" 0.566273 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 629646 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 2025101255331024 | \n",
" 21423-24990-2025-40 | \n",
" Game Spread | \n",
" -103 | \n",
" -118 | \n",
" 21.0 | \n",
" lost | \n",
" 0.482649 | \n",
" 0.517351 | \n",
"
\n",
" \n",
" | 629647 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 2025101255331024 | \n",
" 21423-24990-2025-40 | \n",
" Player Games Won | \n",
" -338 | \n",
" 250 | \n",
" 88.0 | \n",
" lost | \n",
" 0.750412 | \n",
" 0.249588 | \n",
"
\n",
" \n",
" | 629648 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 202510127155911C | \n",
" 15608-13454-2025-40 | \n",
" 1st Set Total Games | \n",
" 391 | \n",
" -539 | \n",
" 148.0 | \n",
" lost | \n",
" 0.171739 | \n",
" 0.828261 | \n",
"
\n",
" \n",
" | 629649 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 202510127155911C | \n",
" 15608-13454-2025-40 | \n",
" Moneyline | \n",
" -377 | \n",
" 295 | \n",
" 82.0 | \n",
" won | \n",
" 0.774719 | \n",
" 0.225281 | \n",
"
\n",
" \n",
" | 629650 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 202510127155911C | \n",
" 15608-13454-2025-40 | \n",
" Total Games | \n",
" -151 | \n",
" 125 | \n",
" 26.0 | \n",
" lost | \n",
" 0.580343 | \n",
" 0.419657 | \n",
"
\n",
" \n",
"
\n",
"
629651 rows × 12 columns
\n",
"
"
],
"text/plain": [
" sportsbook sport league fixture_id \\\n",
"0 1XBet baseball CPBL 202507040615CEC8 \n",
"1 1XBet baseball CPBL 2025070446F737B0 \n",
"2 1XBet baseball CPBL 20250704AD96F740 \n",
"3 1XBet baseball CPBL 20250704AD96F740 \n",
"4 1XBet baseball CPBL 20250704AD96F740 \n",
"... ... ... ... ... \n",
"629646 Pinnacle tennis WTA 2025101255331024 \n",
"629647 Pinnacle tennis WTA 2025101255331024 \n",
"629648 Pinnacle tennis WTA 202510127155911C \n",
"629649 Pinnacle tennis WTA 202510127155911C \n",
"629650 Pinnacle tennis WTA 202510127155911C \n",
"\n",
" game_id market first_price \\\n",
"0 12309-16681-2025-07-04-03 1st Inning Total Runs 104 \n",
"1 26391-13841-2025-07-04-03 Total Runs 162 \n",
"2 26321-38692-2025-07-04-03 1st Half Run Line -159 \n",
"3 26321-38692-2025-07-04-03 Run Line -122 \n",
"4 26321-38692-2025-07-04-03 Team Total 107 \n",
"... ... ... ... \n",
"629646 21423-24990-2025-40 Game Spread -103 \n",
"629647 21423-24990-2025-40 Player Games Won -338 \n",
"629648 15608-13454-2025-40 1st Set Total Games 391 \n",
"629649 15608-13454-2025-40 Moneyline -377 \n",
"629650 15608-13454-2025-40 Total Games -151 \n",
"\n",
" second_price market_width result first_no_vig_prob \\\n",
"0 -145 41.0 lost 0.447007 \n",
"1 -263 101.0 lost 0.318238 \n",
"2 109 50.0 lost 0.571055 \n",
"3 -119 41.0 lost 0.503229 \n",
"4 -156 49.0 lost 0.433727 \n",
"... ... ... ... ... \n",
"629646 -118 21.0 lost 0.482649 \n",
"629647 250 88.0 lost 0.750412 \n",
"629648 -539 148.0 lost 0.171739 \n",
"629649 295 82.0 won 0.774719 \n",
"629650 125 26.0 lost 0.580343 \n",
"\n",
" second_no_vig_prob \n",
"0 0.552993 \n",
"1 0.681762 \n",
"2 0.428945 \n",
"3 0.496771 \n",
"4 0.566273 \n",
"... ... \n",
"629646 0.517351 \n",
"629647 0.249588 \n",
"629648 0.828261 \n",
"629649 0.225281 \n",
"629650 0.419657 \n",
"\n",
"[629651 rows x 12 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60fda142",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 30,
"id": "e3922153",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sportsbook | \n",
" sport | \n",
" count | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1XBet | \n",
" soccer | \n",
" 170549 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1XBet | \n",
" basketball | \n",
" 166590 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1XBet | \n",
" tennis | \n",
" 114015 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1XBet | \n",
" baseball | \n",
" 64485 | \n",
"
\n",
" \n",
" | 4 | \n",
" Pinnacle | \n",
" tennis | \n",
" 48314 | \n",
"
\n",
" \n",
" | 5 | \n",
" 1XBet | \n",
" football | \n",
" 36019 | \n",
"
\n",
" \n",
" | 6 | \n",
" Pinnacle | \n",
" baseball | \n",
" 13706 | \n",
"
\n",
" \n",
" | 7 | \n",
" Pinnacle | \n",
" basketball | \n",
" 8588 | \n",
"
\n",
" \n",
" | 8 | \n",
" 1XBet | \n",
" hockey | \n",
" 3441 | \n",
"
\n",
" \n",
" | 9 | \n",
" Pinnacle | \n",
" soccer | \n",
" 2435 | \n",
"
\n",
" \n",
" | 10 | \n",
" Pinnacle | \n",
" football | \n",
" 1477 | \n",
"
\n",
" \n",
" | 11 | \n",
" Pinnacle | \n",
" hockey | \n",
" 32 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sportsbook sport count\n",
"0 1XBet soccer 170549\n",
"1 1XBet basketball 166590\n",
"2 1XBet tennis 114015\n",
"3 1XBet baseball 64485\n",
"4 Pinnacle tennis 48314\n",
"5 1XBet football 36019\n",
"6 Pinnacle baseball 13706\n",
"7 Pinnacle basketball 8588\n",
"8 1XBet hockey 3441\n",
"9 Pinnacle soccer 2435\n",
"10 Pinnacle football 1477\n",
"11 Pinnacle hockey 32"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"sportsbook\", \"sport\"]].value_counts().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "c9b48951",
"metadata": {},
"outputs": [],
"source": [
"df['win_prob'] = df['first_no_vig_prob']\n",
"df['res'] = df['result']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c575da9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 26,
"id": "2dfaf8ca",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['1XBet' 'baseball'] 64485\n",
"{'logloss': 0.6387950997506274, 'brier': 0.22453871048598073, 'ece': 0.05136008172379394, 'accuracy': 0.6251531363883074, 'reg_alpha': -0.22365612018752326, 'reg_beta': 0.816534516967482, 'n_samples': 64485, 'filter_cols': '1XBet,baseball'}\n",
"['1XBet' 'basketball'] 166590\n",
"{'logloss': 0.669273027668078, 'brier': 0.23856284575034065, 'ece': 0.009400017057809669, 'accuracy': 0.5751905876703284, 'reg_alpha': 0.034476957539975685, 'reg_beta': 0.8760036858377837, 'n_samples': 166590, 'filter_cols': '1XBet,basketball'}\n",
"['1XBet' 'football'] 36019\n",
"{'logloss': 0.5156283069611564, 'brier': 0.17093288618023667, 'ece': 0.03737481116887414, 'accuracy': 0.7366389960853994, 'reg_alpha': -0.22019343021598026, 'reg_beta': 0.9096828468608887, 'n_samples': 36019, 'filter_cols': '1XBet,football'}\n",
"['1XBet' 'hockey'] 3441\n",
"['1XBet' 'soccer'] 170549\n",
"{'logloss': 0.5326802320693952, 'brier': 0.17692943714557405, 'ece': 0.03728482187098291, 'accuracy': 0.7336014869626911, 'reg_alpha': -0.18193411314913413, 'reg_beta': 0.7726877806789224, 'n_samples': 170549, 'filter_cols': '1XBet,soccer'}\n",
"['1XBet' 'tennis'] 114015\n",
"{'logloss': 0.6389011505288847, 'brier': 0.22439917328514708, 'ece': 0.01927127655619132, 'accuracy': 0.6223479366749989, 'reg_alpha': -0.06976580882770708, 'reg_beta': 0.8523393655794403, 'n_samples': 114015, 'filter_cols': '1XBet,tennis'}\n",
"['Pinnacle' 'baseball'] 13706\n",
"{'logloss': 0.6488942536621299, 'brier': 0.2288787220784783, 'ece': 0.01759828591637442, 'accuracy': 0.6151320589522836, 'reg_alpha': -0.06787819238175896, 'reg_beta': 0.9034496569376994, 'n_samples': 13706, 'filter_cols': 'Pinnacle,baseball'}\n",
"['Pinnacle' 'basketball'] 8588\n",
"['Pinnacle' 'football'] 1477\n",
"['Pinnacle' 'hockey'] 32\n",
"['Pinnacle' 'soccer'] 2435\n",
"['Pinnacle' 'tennis'] 48314\n",
"{'logloss': 0.6472225570749982, 'brier': 0.22852018039069258, 'ece': 0.018568682287188856, 'accuracy': 0.605766444508838, 'reg_alpha': -0.07073057145248554, 'reg_beta': 0.9334853391615549, 'n_samples': 48314, 'filter_cols': 'Pinnacle,tennis'}\n"
]
}
],
"source": [
"from pinnacle_experiments import compute_metrics\n",
"data_df = df.copy()\n",
"data_list = []\n",
"cols = [\"sportsbook\",\"sport\"]\n",
"for cs in data_df[cols].drop_duplicates().values:\n",
" tmp_df = data_df[cols + [\"win_prob\", \"res\"]].copy()\n",
" for i, col in enumerate(cols):\n",
" tmp_df = tmp_df[tmp_df[col] == cs[i]]\n",
" print(cs, len(tmp_df))\n",
" if len(tmp_df) < 10000:\n",
" continue\n",
" res = compute_metrics(tmp_df)\n",
" res[\"filter_cols\"] = \",\".join(cs)\n",
" data_list.append(res)\n",
" print(res)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "ace930ea",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" logloss | \n",
" brier | \n",
" ece | \n",
" accuracy | \n",
" reg_alpha | \n",
" reg_beta | \n",
" n_samples | \n",
" filter_cols | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2 | \n",
" 0.515628 | \n",
" 0.170933 | \n",
" 0.037375 | \n",
" 0.736639 | \n",
" 0.220193 | \n",
" 0.909683 | \n",
" 36019 | \n",
" 1XBet,football | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.532680 | \n",
" 0.176929 | \n",
" 0.037285 | \n",
" 0.733601 | \n",
" 0.181934 | \n",
" 0.772688 | \n",
" 170549 | \n",
" 1XBet,soccer | \n",
"
\n",
" \n",
" | 4 | \n",
" 0.638901 | \n",
" 0.224399 | \n",
" 0.019271 | \n",
" 0.622348 | \n",
" 0.069766 | \n",
" 0.852339 | \n",
" 114015 | \n",
" 1XBet,tennis | \n",
"
\n",
" \n",
" | 0 | \n",
" 0.638795 | \n",
" 0.224539 | \n",
" 0.051360 | \n",
" 0.625153 | \n",
" 0.223656 | \n",
" 0.816535 | \n",
" 64485 | \n",
" 1XBet,baseball | \n",
"
\n",
" \n",
" | 6 | \n",
" 0.647223 | \n",
" 0.228520 | \n",
" 0.018569 | \n",
" 0.605766 | \n",
" 0.070731 | \n",
" 0.933485 | \n",
" 48314 | \n",
" Pinnacle,tennis | \n",
"
\n",
" \n",
" | 5 | \n",
" 0.648894 | \n",
" 0.228879 | \n",
" 0.017598 | \n",
" 0.615132 | \n",
" 0.067878 | \n",
" 0.903450 | \n",
" 13706 | \n",
" Pinnacle,baseball | \n",
"
\n",
" \n",
" | 1 | \n",
" 0.669273 | \n",
" 0.238563 | \n",
" 0.009400 | \n",
" 0.575191 | \n",
" 0.034477 | \n",
" 0.876004 | \n",
" 166590 | \n",
" 1XBet,basketball | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" logloss brier ece accuracy reg_alpha reg_beta n_samples \\\n",
"2 0.515628 0.170933 0.037375 0.736639 0.220193 0.909683 36019 \n",
"3 0.532680 0.176929 0.037285 0.733601 0.181934 0.772688 170549 \n",
"4 0.638901 0.224399 0.019271 0.622348 0.069766 0.852339 114015 \n",
"0 0.638795 0.224539 0.051360 0.625153 0.223656 0.816535 64485 \n",
"6 0.647223 0.228520 0.018569 0.605766 0.070731 0.933485 48314 \n",
"5 0.648894 0.228879 0.017598 0.615132 0.067878 0.903450 13706 \n",
"1 0.669273 0.238563 0.009400 0.575191 0.034477 0.876004 166590 \n",
"\n",
" filter_cols \n",
"2 1XBet,football \n",
"3 1XBet,soccer \n",
"4 1XBet,tennis \n",
"0 1XBet,baseball \n",
"6 Pinnacle,tennis \n",
"5 Pinnacle,baseball \n",
"1 1XBet,basketball "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res_df = pd.DataFrame(data_list)\n",
"res_df[\"reg_alpha\"] = abs(res_df[\"reg_alpha\"])\n",
"res_df = res_df.sort_values(by=[ \"brier\", \"logloss\", \"ece\", \"reg_alpha\"])\n",
"res_df"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "402f0cfa",
"metadata": {},
"outputs": [],
"source": [
"res_df.to_csv(\"data/pinnacle_experiments.csv\", index=False, encoding=\"utf-8-sig\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}