import pandas as pd from common.bet_tools import compute_metrics from common.bet_tools import calculate_no_vig_moneyline_power, moneyline_to_prob from loguru import logger from common.utils import timeit def get_no_vig_prob(row) -> pd.Series: odds = [row["first_price"], row["second_price"]] no_vig_odds_power = calculate_no_vig_moneyline_power(odds) novig_probs_power = [moneyline_to_prob(o) for o in no_vig_odds_power] # 返回两个无水概率 return pd.Series( { "first_no_vig_prob": novig_probs_power[0], "second_no_vig_prob": novig_probs_power[1], } ) @timeit def calc_metrics(df: pd.DataFrame, cols: list) -> pd.DataFrame: data_list = [] for cs in df[cols].drop_duplicates().values: tmp_df = df[cols + ["win_prob", "res"]].copy() for i, col in enumerate(cols): tmp_df = tmp_df[tmp_df[col] == cs[i]] # if len(tmp_df) < 10000: # continue res = compute_metrics(df=tmp_df, include_draws=False) res["filter_cols"] = ",".join(cs) data_list.append(res) res_df = pd.DataFrame(data_list) res_df["reg_alpha"] = abs(res_df["reg_alpha"]) res_df = res_df.sort_values(by=["brier", "logloss", "ece", "reg_alpha"]) return res_df if __name__ == "__main__": df = pd.read_csv( "/Users/aszer/Documents/vscode/bet/data/pinnical_1xbet_all_api.csv", encoding="utf-8-sig", ) df = df[ [ "sportsbook", "sport", "league", "fixture_id", "game_id", "market", "first_price", "second_price", "market_width", "result", ] ] # 防止 SettingWithCopyWarning,推荐使用 .loc 显式分配 df.loc[:, ["first_no_vig_prob", "second_no_vig_prob"]] = df[ ["first_price", "second_price"] ].apply(get_no_vig_prob, axis=1) df["win_prob"] = df["first_no_vig_prob"] df["res"] = df["result"] cols = ["sportsbook", "sport"] res_df = calc_metrics(df, cols) logger.info(f"\n{res_df}")