{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "480f73ee",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "94725a95",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_feather(\n",
" \"/Users/aszer/Documents/vscode/bet/data/api_signal_res/api_res_merged_processed.feather\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2c620ff4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/km/5gc2fwqs19sbr04h1_j5_rzw0000gn/T/ipykernel_7502/2562396296.py:1: DtypeWarning: Columns (43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(\n"
]
}
],
"source": [
"df = pd.read_csv(\n",
" \"/Users/aszer/Documents/vscode/bet/data/pinnical_1xbet_all_api.csv\",\n",
" encoding=\"utf-8-sig\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a062a651",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['id', 'sportsbook', 'sport', 'league', 'fixture_id', 'game_id',\n",
" 'market', 'grouping_key', 'timestamp', 'first_name', 'second_name',\n",
" 'first_selection', 'second_selection', 'first_selection_line',\n",
" 'second_selection_line', 'first_selection_points',\n",
" 'second_selection_points', 'first_points', 'second_points',\n",
" 'first_deep_link', 'second_deep_link', 'first_price', 'second_price',\n",
" 'first_novig_price', 'second_novig_price', 'first_power_novig_price',\n",
" 'second_power_novig_price', 'market_width', 'sportsbook_count',\n",
" 'time_diff', 'pinnacle_novig_begin_first',\n",
" 'pinnacle_novig_begin_second', 'pinnacle_novig_realtime_first',\n",
" 'pinnacle_novig_realtime_second', 'pinnacle_power_novig_begin_first',\n",
" 'pinnacle_power_novig_begin_second',\n",
" 'pinnacle_power_novig_realtime_first',\n",
" 'pinnacle_power_novig_realtime_second', 'max_price_realtime_first',\n",
" 'max_price_realtime_second', 'gmt_created', 'gmt_modified', 'bet_id',\n",
" 'max_price_sportsbook_realtime_first',\n",
" 'max_price_sportsbook_realtime_second', 'bet_status', 'result', 'rn'],\n",
" dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "49cc8dbb",
"metadata": {},
"outputs": [],
"source": [
"df = df[\n",
" [\n",
" \"sportsbook\",\n",
" \"sport\",\n",
" \"league\",\n",
" \"fixture_id\",\n",
" \"game_id\",\n",
" \"market\",\n",
" \"first_price\",\n",
" \"second_price\",\n",
" \"market_width\",\n",
" \"result\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b118efae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 lost\n",
"5 refunded\n",
"18 won\n",
"Name: result, dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"result\"].drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "881f62ec",
"metadata": {},
"outputs": [],
"source": [
"from common.bet_tools import calculate_no_vig_moneyline_power, moneyline_to_prob\n",
"\n",
"def get_no_vig_prob(row):\n",
" odds = [row[\"first_price\"], row[\"second_price\"]]\n",
" no_vig_odds_power = calculate_no_vig_moneyline_power(odds)\n",
" novig_probs_power = [moneyline_to_prob(o) for o in no_vig_odds_power]\n",
" # 返回两个无水概率\n",
" return pd.Series(\n",
" {\n",
" \"first_no_vig_prob\": novig_probs_power[0],\n",
" \"second_no_vig_prob\": novig_probs_power[1],\n",
" }\n",
" )\n",
"\n",
"# 防止 SettingWithCopyWarning,推荐使用 .loc 显式分配\n",
"df.loc[:, [\"first_no_vig_prob\", \"second_no_vig_prob\"]] = df[[\"first_price\", \"second_price\"]].apply(get_no_vig_prob, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5f066d95",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sportsbook | \n",
" sport | \n",
" league | \n",
" fixture_id | \n",
" game_id | \n",
" market | \n",
" first_price | \n",
" second_price | \n",
" market_width | \n",
" result | \n",
" first_no_vig_prob | \n",
" second_no_vig_prob | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 202507040615CEC8 | \n",
" 12309-16681-2025-07-04-03 | \n",
" 1st Inning Total Runs | \n",
" 104 | \n",
" -145 | \n",
" 41.0 | \n",
" lost | \n",
" 0.447007 | \n",
" 0.552993 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 2025070446F737B0 | \n",
" 26391-13841-2025-07-04-03 | \n",
" Total Runs | \n",
" 162 | \n",
" -263 | \n",
" 101.0 | \n",
" lost | \n",
" 0.318238 | \n",
" 0.681762 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 20250704AD96F740 | \n",
" 26321-38692-2025-07-04-03 | \n",
" 1st Half Run Line | \n",
" -159 | \n",
" 109 | \n",
" 50.0 | \n",
" lost | \n",
" 0.571055 | \n",
" 0.428945 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 20250704AD96F740 | \n",
" 26321-38692-2025-07-04-03 | \n",
" Run Line | \n",
" -122 | \n",
" -119 | \n",
" 41.0 | \n",
" lost | \n",
" 0.503229 | \n",
" 0.496771 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1XBet | \n",
" baseball | \n",
" CPBL | \n",
" 20250704AD96F740 | \n",
" 26321-38692-2025-07-04-03 | \n",
" Team Total | \n",
" 107 | \n",
" -156 | \n",
" 49.0 | \n",
" lost | \n",
" 0.433727 | \n",
" 0.566273 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 629646 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 2025101255331024 | \n",
" 21423-24990-2025-40 | \n",
" Game Spread | \n",
" -103 | \n",
" -118 | \n",
" 21.0 | \n",
" lost | \n",
" 0.482649 | \n",
" 0.517351 | \n",
"
\n",
" \n",
" | 629647 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 2025101255331024 | \n",
" 21423-24990-2025-40 | \n",
" Player Games Won | \n",
" -338 | \n",
" 250 | \n",
" 88.0 | \n",
" lost | \n",
" 0.750412 | \n",
" 0.249588 | \n",
"
\n",
" \n",
" | 629648 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 202510127155911C | \n",
" 15608-13454-2025-40 | \n",
" 1st Set Total Games | \n",
" 391 | \n",
" -539 | \n",
" 148.0 | \n",
" lost | \n",
" 0.171739 | \n",
" 0.828261 | \n",
"
\n",
" \n",
" | 629649 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 202510127155911C | \n",
" 15608-13454-2025-40 | \n",
" Moneyline | \n",
" -377 | \n",
" 295 | \n",
" 82.0 | \n",
" won | \n",
" 0.774719 | \n",
" 0.225281 | \n",
"
\n",
" \n",
" | 629650 | \n",
" Pinnacle | \n",
" tennis | \n",
" WTA | \n",
" 202510127155911C | \n",
" 15608-13454-2025-40 | \n",
" Total Games | \n",
" -151 | \n",
" 125 | \n",
" 26.0 | \n",
" lost | \n",
" 0.580343 | \n",
" 0.419657 | \n",
"
\n",
" \n",
"
\n",
"
629651 rows × 12 columns
\n",
"
"
],
"text/plain": [
" sportsbook sport league fixture_id \\\n",
"0 1XBet baseball CPBL 202507040615CEC8 \n",
"1 1XBet baseball CPBL 2025070446F737B0 \n",
"2 1XBet baseball CPBL 20250704AD96F740 \n",
"3 1XBet baseball CPBL 20250704AD96F740 \n",
"4 1XBet baseball CPBL 20250704AD96F740 \n",
"... ... ... ... ... \n",
"629646 Pinnacle tennis WTA 2025101255331024 \n",
"629647 Pinnacle tennis WTA 2025101255331024 \n",
"629648 Pinnacle tennis WTA 202510127155911C \n",
"629649 Pinnacle tennis WTA 202510127155911C \n",
"629650 Pinnacle tennis WTA 202510127155911C \n",
"\n",
" game_id market first_price \\\n",
"0 12309-16681-2025-07-04-03 1st Inning Total Runs 104 \n",
"1 26391-13841-2025-07-04-03 Total Runs 162 \n",
"2 26321-38692-2025-07-04-03 1st Half Run Line -159 \n",
"3 26321-38692-2025-07-04-03 Run Line -122 \n",
"4 26321-38692-2025-07-04-03 Team Total 107 \n",
"... ... ... ... \n",
"629646 21423-24990-2025-40 Game Spread -103 \n",
"629647 21423-24990-2025-40 Player Games Won -338 \n",
"629648 15608-13454-2025-40 1st Set Total Games 391 \n",
"629649 15608-13454-2025-40 Moneyline -377 \n",
"629650 15608-13454-2025-40 Total Games -151 \n",
"\n",
" second_price market_width result first_no_vig_prob \\\n",
"0 -145 41.0 lost 0.447007 \n",
"1 -263 101.0 lost 0.318238 \n",
"2 109 50.0 lost 0.571055 \n",
"3 -119 41.0 lost 0.503229 \n",
"4 -156 49.0 lost 0.433727 \n",
"... ... ... ... ... \n",
"629646 -118 21.0 lost 0.482649 \n",
"629647 250 88.0 lost 0.750412 \n",
"629648 -539 148.0 lost 0.171739 \n",
"629649 295 82.0 won 0.774719 \n",
"629650 125 26.0 lost 0.580343 \n",
"\n",
" second_no_vig_prob \n",
"0 0.552993 \n",
"1 0.681762 \n",
"2 0.428945 \n",
"3 0.496771 \n",
"4 0.566273 \n",
"... ... \n",
"629646 0.517351 \n",
"629647 0.249588 \n",
"629648 0.828261 \n",
"629649 0.225281 \n",
"629650 0.419657 \n",
"\n",
"[629651 rows x 12 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60fda142",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 30,
"id": "e3922153",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sportsbook | \n",
" sport | \n",
" count | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1XBet | \n",
" soccer | \n",
" 170549 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1XBet | \n",
" basketball | \n",
" 166590 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1XBet | \n",
" tennis | \n",
" 114015 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1XBet | \n",
" baseball | \n",
" 64485 | \n",
"
\n",
" \n",
" | 4 | \n",
" Pinnacle | \n",
" tennis | \n",
" 48314 | \n",
"
\n",
" \n",
" | 5 | \n",
" 1XBet | \n",
" football | \n",
" 36019 | \n",
"
\n",
" \n",
" | 6 | \n",
" Pinnacle | \n",
" baseball | \n",
" 13706 | \n",
"
\n",
" \n",
" | 7 | \n",
" Pinnacle | \n",
" basketball | \n",
" 8588 | \n",
"
\n",
" \n",
" | 8 | \n",
" 1XBet | \n",
" hockey | \n",
" 3441 | \n",
"
\n",
" \n",
" | 9 | \n",
" Pinnacle | \n",
" soccer | \n",
" 2435 | \n",
"
\n",
" \n",
" | 10 | \n",
" Pinnacle | \n",
" football | \n",
" 1477 | \n",
"
\n",
" \n",
" | 11 | \n",
" Pinnacle | \n",
" hockey | \n",
" 32 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sportsbook sport count\n",
"0 1XBet soccer 170549\n",
"1 1XBet basketball 166590\n",
"2 1XBet tennis 114015\n",
"3 1XBet baseball 64485\n",
"4 Pinnacle tennis 48314\n",
"5 1XBet football 36019\n",
"6 Pinnacle baseball 13706\n",
"7 Pinnacle basketball 8588\n",
"8 1XBet hockey 3441\n",
"9 Pinnacle soccer 2435\n",
"10 Pinnacle football 1477\n",
"11 Pinnacle hockey 32"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"sportsbook\", \"sport\"]].value_counts().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "c9b48951",
"metadata": {},
"outputs": [],
"source": [
"df['win_prob'] = df['first_no_vig_prob']\n",
"df['res'] = df['result']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c575da9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"id": "2dfaf8ca",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'df' is not defined",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbet_tools\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m compute_metrics\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m data_df = \u001b[43mdf\u001b[49m.copy()\n\u001b[32m 3\u001b[39m data_list = []\n\u001b[32m 4\u001b[39m cols = [\u001b[33m\"\u001b[39m\u001b[33msportsbook\u001b[39m\u001b[33m\"\u001b[39m,\u001b[33m\"\u001b[39m\u001b[33msport\u001b[39m\u001b[33m\"\u001b[39m]\n",
"\u001b[31mNameError\u001b[39m: name 'df' is not defined"
]
}
],
"source": [
"from common.bet_tools import compute_metrics\n",
"data_df = df.copy()\n",
"data_list = []\n",
"cols = [\"sportsbook\",\"sport\"]\n",
"for cs in data_df[cols].drop_duplicates().values:\n",
" tmp_df = data_df[cols + [\"win_prob\", \"res\"]].copy()\n",
" for i, col in enumerate(cols):\n",
" tmp_df = tmp_df[tmp_df[col] == cs[i]]\n",
" print(cs, len(tmp_df))\n",
" if len(tmp_df) < 10000:\n",
" continue\n",
" res = compute_metrics(tmp_df)\n",
" res[\"filter_cols\"] = \",\".join(cs)\n",
" data_list.append(res)\n",
" print(res)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "ace930ea",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" logloss | \n",
" brier | \n",
" ece | \n",
" accuracy | \n",
" reg_alpha | \n",
" reg_beta | \n",
" n_samples | \n",
" filter_cols | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2 | \n",
" 0.515628 | \n",
" 0.170933 | \n",
" 0.037375 | \n",
" 0.736639 | \n",
" 0.220193 | \n",
" 0.909683 | \n",
" 36019 | \n",
" 1XBet,football | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.532680 | \n",
" 0.176929 | \n",
" 0.037285 | \n",
" 0.733601 | \n",
" 0.181934 | \n",
" 0.772688 | \n",
" 170549 | \n",
" 1XBet,soccer | \n",
"
\n",
" \n",
" | 4 | \n",
" 0.638901 | \n",
" 0.224399 | \n",
" 0.019271 | \n",
" 0.622348 | \n",
" 0.069766 | \n",
" 0.852339 | \n",
" 114015 | \n",
" 1XBet,tennis | \n",
"
\n",
" \n",
" | 0 | \n",
" 0.638795 | \n",
" 0.224539 | \n",
" 0.051360 | \n",
" 0.625153 | \n",
" 0.223656 | \n",
" 0.816535 | \n",
" 64485 | \n",
" 1XBet,baseball | \n",
"
\n",
" \n",
" | 6 | \n",
" 0.647223 | \n",
" 0.228520 | \n",
" 0.018569 | \n",
" 0.605766 | \n",
" 0.070731 | \n",
" 0.933485 | \n",
" 48314 | \n",
" Pinnacle,tennis | \n",
"
\n",
" \n",
" | 5 | \n",
" 0.648894 | \n",
" 0.228879 | \n",
" 0.017598 | \n",
" 0.615132 | \n",
" 0.067878 | \n",
" 0.903450 | \n",
" 13706 | \n",
" Pinnacle,baseball | \n",
"
\n",
" \n",
" | 1 | \n",
" 0.669273 | \n",
" 0.238563 | \n",
" 0.009400 | \n",
" 0.575191 | \n",
" 0.034477 | \n",
" 0.876004 | \n",
" 166590 | \n",
" 1XBet,basketball | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" logloss brier ece accuracy reg_alpha reg_beta n_samples \\\n",
"2 0.515628 0.170933 0.037375 0.736639 0.220193 0.909683 36019 \n",
"3 0.532680 0.176929 0.037285 0.733601 0.181934 0.772688 170549 \n",
"4 0.638901 0.224399 0.019271 0.622348 0.069766 0.852339 114015 \n",
"0 0.638795 0.224539 0.051360 0.625153 0.223656 0.816535 64485 \n",
"6 0.647223 0.228520 0.018569 0.605766 0.070731 0.933485 48314 \n",
"5 0.648894 0.228879 0.017598 0.615132 0.067878 0.903450 13706 \n",
"1 0.669273 0.238563 0.009400 0.575191 0.034477 0.876004 166590 \n",
"\n",
" filter_cols \n",
"2 1XBet,football \n",
"3 1XBet,soccer \n",
"4 1XBet,tennis \n",
"0 1XBet,baseball \n",
"6 Pinnacle,tennis \n",
"5 Pinnacle,baseball \n",
"1 1XBet,basketball "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res_df = pd.DataFrame(data_list)\n",
"res_df[\"reg_alpha\"] = abs(res_df[\"reg_alpha\"])\n",
"res_df = res_df.sort_values(by=[ \"brier\", \"logloss\", \"ece\", \"reg_alpha\"])\n",
"res_df"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "402f0cfa",
"metadata": {},
"outputs": [],
"source": [
"res_df.to_csv(\"data/pinnacle_experiments.csv\", index=False, encoding=\"utf-8-sig\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}