{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "480f73ee", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "id": "94725a95", "metadata": {}, "outputs": [], "source": [ "df = pd.read_feather(\n", " \"/Users/aszer/Documents/vscode/bet/data/api_signal_res/api_res_merged_processed.feather\"\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "id": "2c620ff4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/km/5gc2fwqs19sbr04h1_j5_rzw0000gn/T/ipykernel_7502/2562396296.py:1: DtypeWarning: Columns (43,44) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(\n" ] } ], "source": [ "df = pd.read_csv(\n", " \"/Users/aszer/Documents/vscode/bet/data/pinnical_1xbet_all_api.csv\",\n", " encoding=\"utf-8-sig\",\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "id": "a062a651", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['id', 'sportsbook', 'sport', 'league', 'fixture_id', 'game_id',\n", " 'market', 'grouping_key', 'timestamp', 'first_name', 'second_name',\n", " 'first_selection', 'second_selection', 'first_selection_line',\n", " 'second_selection_line', 'first_selection_points',\n", " 'second_selection_points', 'first_points', 'second_points',\n", " 'first_deep_link', 'second_deep_link', 'first_price', 'second_price',\n", " 'first_novig_price', 'second_novig_price', 'first_power_novig_price',\n", " 'second_power_novig_price', 'market_width', 'sportsbook_count',\n", " 'time_diff', 'pinnacle_novig_begin_first',\n", " 'pinnacle_novig_begin_second', 'pinnacle_novig_realtime_first',\n", " 'pinnacle_novig_realtime_second', 'pinnacle_power_novig_begin_first',\n", " 'pinnacle_power_novig_begin_second',\n", " 'pinnacle_power_novig_realtime_first',\n", " 'pinnacle_power_novig_realtime_second', 'max_price_realtime_first',\n", " 'max_price_realtime_second', 'gmt_created', 'gmt_modified', 'bet_id',\n", " 'max_price_sportsbook_realtime_first',\n", " 'max_price_sportsbook_realtime_second', 'bet_status', 'result', 'rn'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 4, "id": "49cc8dbb", "metadata": {}, "outputs": [], "source": [ "df = df[\n", " [\n", " \"sportsbook\",\n", " \"sport\",\n", " \"league\",\n", " \"fixture_id\",\n", " \"game_id\",\n", " \"market\",\n", " \"first_price\",\n", " \"second_price\",\n", " \"market_width\",\n", " \"result\",\n", " ]\n", "]" ] }, { "cell_type": "code", "execution_count": 5, "id": "b118efae", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 lost\n", "5 refunded\n", "18 won\n", "Name: result, dtype: object" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"result\"].drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 6, "id": "881f62ec", "metadata": {}, "outputs": [], "source": [ "from common.bet_tools import calculate_no_vig_moneyline_power, moneyline_to_prob\n", "\n", "def get_no_vig_prob(row):\n", " odds = [row[\"first_price\"], row[\"second_price\"]]\n", " no_vig_odds_power = calculate_no_vig_moneyline_power(odds)\n", " novig_probs_power = [moneyline_to_prob(o) for o in no_vig_odds_power]\n", " # 返回两个无水概率\n", " return pd.Series(\n", " {\n", " \"first_no_vig_prob\": novig_probs_power[0],\n", " \"second_no_vig_prob\": novig_probs_power[1],\n", " }\n", " )\n", "\n", "# 防止 SettingWithCopyWarning,推荐使用 .loc 显式分配\n", "df.loc[:, [\"first_no_vig_prob\", \"second_no_vig_prob\"]] = df[[\"first_price\", \"second_price\"]].apply(get_no_vig_prob, axis=1)" ] }, { "cell_type": "code", "execution_count": 7, "id": "5f066d95", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sportsbooksportleaguefixture_idgame_idmarketfirst_pricesecond_pricemarket_widthresultfirst_no_vig_probsecond_no_vig_prob
01XBetbaseballCPBL202507040615CEC812309-16681-2025-07-04-031st Inning Total Runs104-14541.0lost0.4470070.552993
11XBetbaseballCPBL2025070446F737B026391-13841-2025-07-04-03Total Runs162-263101.0lost0.3182380.681762
21XBetbaseballCPBL20250704AD96F74026321-38692-2025-07-04-031st Half Run Line-15910950.0lost0.5710550.428945
31XBetbaseballCPBL20250704AD96F74026321-38692-2025-07-04-03Run Line-122-11941.0lost0.5032290.496771
41XBetbaseballCPBL20250704AD96F74026321-38692-2025-07-04-03Team Total107-15649.0lost0.4337270.566273
.......................................
629646PinnacletennisWTA202510125533102421423-24990-2025-40Game Spread-103-11821.0lost0.4826490.517351
629647PinnacletennisWTA202510125533102421423-24990-2025-40Player Games Won-33825088.0lost0.7504120.249588
629648PinnacletennisWTA202510127155911C15608-13454-2025-401st Set Total Games391-539148.0lost0.1717390.828261
629649PinnacletennisWTA202510127155911C15608-13454-2025-40Moneyline-37729582.0won0.7747190.225281
629650PinnacletennisWTA202510127155911C15608-13454-2025-40Total Games-15112526.0lost0.5803430.419657
\n", "

629651 rows × 12 columns

\n", "
" ], "text/plain": [ " sportsbook sport league fixture_id \\\n", "0 1XBet baseball CPBL 202507040615CEC8 \n", "1 1XBet baseball CPBL 2025070446F737B0 \n", "2 1XBet baseball CPBL 20250704AD96F740 \n", "3 1XBet baseball CPBL 20250704AD96F740 \n", "4 1XBet baseball CPBL 20250704AD96F740 \n", "... ... ... ... ... \n", "629646 Pinnacle tennis WTA 2025101255331024 \n", "629647 Pinnacle tennis WTA 2025101255331024 \n", "629648 Pinnacle tennis WTA 202510127155911C \n", "629649 Pinnacle tennis WTA 202510127155911C \n", "629650 Pinnacle tennis WTA 202510127155911C \n", "\n", " game_id market first_price \\\n", "0 12309-16681-2025-07-04-03 1st Inning Total Runs 104 \n", "1 26391-13841-2025-07-04-03 Total Runs 162 \n", "2 26321-38692-2025-07-04-03 1st Half Run Line -159 \n", "3 26321-38692-2025-07-04-03 Run Line -122 \n", "4 26321-38692-2025-07-04-03 Team Total 107 \n", "... ... ... ... \n", "629646 21423-24990-2025-40 Game Spread -103 \n", "629647 21423-24990-2025-40 Player Games Won -338 \n", "629648 15608-13454-2025-40 1st Set Total Games 391 \n", "629649 15608-13454-2025-40 Moneyline -377 \n", "629650 15608-13454-2025-40 Total Games -151 \n", "\n", " second_price market_width result first_no_vig_prob \\\n", "0 -145 41.0 lost 0.447007 \n", "1 -263 101.0 lost 0.318238 \n", "2 109 50.0 lost 0.571055 \n", "3 -119 41.0 lost 0.503229 \n", "4 -156 49.0 lost 0.433727 \n", "... ... ... ... ... \n", "629646 -118 21.0 lost 0.482649 \n", "629647 250 88.0 lost 0.750412 \n", "629648 -539 148.0 lost 0.171739 \n", "629649 295 82.0 won 0.774719 \n", "629650 125 26.0 lost 0.580343 \n", "\n", " second_no_vig_prob \n", "0 0.552993 \n", "1 0.681762 \n", "2 0.428945 \n", "3 0.496771 \n", "4 0.566273 \n", "... ... \n", "629646 0.517351 \n", "629647 0.249588 \n", "629648 0.828261 \n", "629649 0.225281 \n", "629650 0.419657 \n", "\n", "[629651 rows x 12 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "id": "60fda142", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 30, "id": "e3922153", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sportsbooksportcount
01XBetsoccer170549
11XBetbasketball166590
21XBettennis114015
31XBetbaseball64485
4Pinnacletennis48314
51XBetfootball36019
6Pinnaclebaseball13706
7Pinnaclebasketball8588
81XBethockey3441
9Pinnaclesoccer2435
10Pinnaclefootball1477
11Pinnaclehockey32
\n", "
" ], "text/plain": [ " sportsbook sport count\n", "0 1XBet soccer 170549\n", "1 1XBet basketball 166590\n", "2 1XBet tennis 114015\n", "3 1XBet baseball 64485\n", "4 Pinnacle tennis 48314\n", "5 1XBet football 36019\n", "6 Pinnacle baseball 13706\n", "7 Pinnacle basketball 8588\n", "8 1XBet hockey 3441\n", "9 Pinnacle soccer 2435\n", "10 Pinnacle football 1477\n", "11 Pinnacle hockey 32" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[[\"sportsbook\", \"sport\"]].value_counts().reset_index()" ] }, { "cell_type": "code", "execution_count": 20, "id": "c9b48951", "metadata": {}, "outputs": [], "source": [ "df['win_prob'] = df['first_no_vig_prob']\n", "df['res'] = df['result']" ] }, { "cell_type": "code", "execution_count": null, "id": "6c575da9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "id": "2dfaf8ca", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'df' is not defined", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbet_tools\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m compute_metrics\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m data_df = \u001b[43mdf\u001b[49m.copy()\n\u001b[32m 3\u001b[39m data_list = []\n\u001b[32m 4\u001b[39m cols = [\u001b[33m\"\u001b[39m\u001b[33msportsbook\u001b[39m\u001b[33m\"\u001b[39m,\u001b[33m\"\u001b[39m\u001b[33msport\u001b[39m\u001b[33m\"\u001b[39m]\n", "\u001b[31mNameError\u001b[39m: name 'df' is not defined" ] } ], "source": [ "from common.bet_tools import compute_metrics\n", "data_df = df.copy()\n", "data_list = []\n", "cols = [\"sportsbook\",\"sport\"]\n", "for cs in data_df[cols].drop_duplicates().values:\n", " tmp_df = data_df[cols + [\"win_prob\", \"res\"]].copy()\n", " for i, col in enumerate(cols):\n", " tmp_df = tmp_df[tmp_df[col] == cs[i]]\n", " print(cs, len(tmp_df))\n", " if len(tmp_df) < 10000:\n", " continue\n", " res = compute_metrics(tmp_df)\n", " res[\"filter_cols\"] = \",\".join(cs)\n", " data_list.append(res)\n", " print(res)" ] }, { "cell_type": "code", "execution_count": 31, "id": "ace930ea", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
loglossbriereceaccuracyreg_alphareg_betan_samplesfilter_cols
20.5156280.1709330.0373750.7366390.2201930.909683360191XBet,football
30.5326800.1769290.0372850.7336010.1819340.7726881705491XBet,soccer
40.6389010.2243990.0192710.6223480.0697660.8523391140151XBet,tennis
00.6387950.2245390.0513600.6251530.2236560.816535644851XBet,baseball
60.6472230.2285200.0185690.6057660.0707310.93348548314Pinnacle,tennis
50.6488940.2288790.0175980.6151320.0678780.90345013706Pinnacle,baseball
10.6692730.2385630.0094000.5751910.0344770.8760041665901XBet,basketball
\n", "
" ], "text/plain": [ " logloss brier ece accuracy reg_alpha reg_beta n_samples \\\n", "2 0.515628 0.170933 0.037375 0.736639 0.220193 0.909683 36019 \n", "3 0.532680 0.176929 0.037285 0.733601 0.181934 0.772688 170549 \n", "4 0.638901 0.224399 0.019271 0.622348 0.069766 0.852339 114015 \n", "0 0.638795 0.224539 0.051360 0.625153 0.223656 0.816535 64485 \n", "6 0.647223 0.228520 0.018569 0.605766 0.070731 0.933485 48314 \n", "5 0.648894 0.228879 0.017598 0.615132 0.067878 0.903450 13706 \n", "1 0.669273 0.238563 0.009400 0.575191 0.034477 0.876004 166590 \n", "\n", " filter_cols \n", "2 1XBet,football \n", "3 1XBet,soccer \n", "4 1XBet,tennis \n", "0 1XBet,baseball \n", "6 Pinnacle,tennis \n", "5 Pinnacle,baseball \n", "1 1XBet,basketball " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res_df = pd.DataFrame(data_list)\n", "res_df[\"reg_alpha\"] = abs(res_df[\"reg_alpha\"])\n", "res_df = res_df.sort_values(by=[ \"brier\", \"logloss\", \"ece\", \"reg_alpha\"])\n", "res_df" ] }, { "cell_type": "code", "execution_count": 54, "id": "402f0cfa", "metadata": {}, "outputs": [], "source": [ "res_df.to_csv(\"data/pinnacle_experiments.csv\", index=False, encoding=\"utf-8-sig\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 5 }