metrics函数移动到bet tools

2025-10-25 22:17:28 +08:00
parent 22bede007e
commit 332284293f
3 changed files with 117 additions and 137 deletions
--- a/common/bet_tools.py
+++ b/common/bet_tools.py
@@ -1,5 +1,11 @@
 import math
 import numpy as np
 import pandas as pd
 from scipy.optimize import fsolve  # 导入 fsolve 函数用于数值求解
 from scipy.special import logit as sp_logit
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import log_loss
 def moneyline_to_prob(moneyline_odds: int) -> float:
@@ -154,6 +160,107 @@ def calculate_no_vig_moneyline_power(moneyline_odds_list: list[int]) -> list[int
    return no_vig_moneyline_odds
 def compute_metrics(
    df: pd.DataFrame,
    n_bins: int = 10,
    bin_strategy: str = "uniform",  # 'uniform' or 'quantile'
    include_draws: bool = True,
    eps: float = 1e-6,
 ) -> dict:
    """
    计算预测评估指标并拟合校准关系。
        参数:
            - df: 包含至少两列: 'win_prob' (预测主胜概率), 'res' (取 'won','refunded','lost')
      - n_bins: ECE 分箱数
      - bin_strategy: 'uniform' (等宽) 或 'quantile' (等频)
      - include_draws: 若 True, 将 'draw' 视为非胜 (y=0)。若 False, 丢弃 'draw' 行。
      - eps: 概率裁剪下限，用于数值稳定
    返回:
      dict 包含 logloss, brier, ece, accuracy, reg_alpha, reg_beta, ece_bins, n_samples
    """
    # 处理 refunded
    if include_draws:
        mask = df["res"].isin(["won", "refunded", "lost"])
    else:
        mask = df["res"].isin(["won", "lost"])
    df = df[mask].copy()
    # 标签: won=1, others=0 (包括 refunded)
    y = df["res"].map({"won": 1, "refunded": 0, "lost": 0}).astype(int).values
    p = df["win_prob"].astype(float).values
    # 裁剪概率以保证数值稳定
    p_clip = np.clip(p, eps, 1 - eps)
    # logloss: 使用 sklearn 实现以获得更稳健的数值行为
    try:
        logloss = float(log_loss(y, p_clip, labels=[0, 1]))
    except Exception:
        # 备用实现
        logloss = float(-np.mean(y * np.log(p_clip) + (1 - y) * np.log(1 - p_clip)))
    # brier score
    brier = float(np.mean((p_clip - y) ** 2))
    # ECE 计算 (支持 uniform 或 quantile)
    if bin_strategy == "quantile":
        # quantile bin edges
        try:
            edges = np.unique(np.percentile(p_clip, np.linspace(0, 100, n_bins + 1)))
            if len(edges) - 1 <= 0:
                # fallback to uniform
                bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1)
            else:
                # searchsorted to assign bins
                bin_idxs = np.clip(
                    np.searchsorted(edges, p_clip, side="right") - 1, 0, len(edges) - 2
                )
        except Exception:
            bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1)
    else:
        bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1)
    ece = 0.0
    total = len(y)
    bin_stats = []
    for b in range(n_bins):
        idx = bin_idxs == b
        count = int(idx.sum())
        if count == 0:
            bin_stats.append(
                {"count": 0, "mean_pred": float("nan"), "emp_freq": float("nan")}
            )
            continue
        mean_pred = float(p_clip[idx].mean())
        emp_freq = float(y[idx].mean())
        ece += abs(mean_pred - emp_freq) * count
        bin_stats.append({"count": count, "mean_pred": mean_pred, "emp_freq": emp_freq})
    ece = float(ece / total) if total > 0 else float("nan")
    # accuracy
    acc = float(np.mean((p_clip >= 0.5) == (y == 1)))
    # 校准拟合: 使用 LogisticRegression 拟合 logit(E[y]) = alpha + beta * logit(p)
    X = sp_logit(p_clip).reshape(-1, 1)
    clf = LogisticRegression(C=1e6, solver="lbfgs", max_iter=200)
    clf.fit(X, y)
    alpha = float(clf.intercept_[0])
    beta = float(clf.coef_[0][0])
    return {
        "logloss": logloss,
        "brier": brier,
        "ece": ece,
        "accuracy": acc,
        "reg_alpha": alpha,
        "reg_beta": beta,
        # 'ece_bins': bin_stats,
        "n_samples": int(total),
    }
 # 示例
 if __name__ == "__main__":
    odds_list = [+150, -200, +300, -120]
--- a/pinnacle_experiments.py
+++ b/pinnacle_experiments.py
@@ -1,113 +0,0 @@
 import numpy as np
 import pandas as pd
 from scipy.special import logit as sp_logit
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import log_loss
 def compute_metrics(
    df: pd.DataFrame,
    n_bins: int = 10,
    bin_strategy: str = "uniform",  # 'uniform' or 'quantile'
    include_draws: bool = True,
    eps: float = 1e-6,
 ) -> dict:
    """
    计算预测评估指标并拟合校准关系。
        参数:
            - df: 包含至少两列: 'win_prob' (预测主胜概率), 'res' (取 'won','refunded','lost')
      - n_bins: ECE 分箱数
      - bin_strategy: 'uniform' (等宽) 或 'quantile' (等频)
      - include_draws: 若 True, 将 'draw' 视为非胜 (y=0)。若 False, 丢弃 'draw' 行。
      - eps: 概率裁剪下限，用于数值稳定
    返回:
      dict 包含 logloss, brier, ece, accuracy, reg_alpha, reg_beta, ece_bins, n_samples
    """
    # 处理 refunded
    if include_draws:
        mask = df["res"].isin(["won", "refunded", "lost"])
    else:
        mask = df["res"].isin(["won", "lost"])
    df = df[mask].copy()
    # 标签: won=1, others=0 (包括 refunded)
    y = df["res"].map({"won": 1, "refunded": 0, "lost": 0}).astype(int).values
    p = df["win_prob"].astype(float).values
    # 裁剪概率以保证数值稳定
    p_clip = np.clip(p, eps, 1 - eps)
    # logloss: 使用 sklearn 实现以获得更稳健的数值行为
    try:
        logloss = float(log_loss(y, p_clip, labels=[0, 1]))
    except Exception:
        # 备用实现
        logloss = float(-np.mean(y * np.log(p_clip) + (1 - y) * np.log(1 - p_clip)))
    # brier score
    brier = float(np.mean((p_clip - y) ** 2))
    # ECE 计算 (支持 uniform 或 quantile)
    if bin_strategy == "quantile":
        # quantile bin edges
        try:
            edges = np.unique(np.percentile(p_clip, np.linspace(0, 100, n_bins + 1)))
            if len(edges) - 1 <= 0:
                # fallback to uniform
                bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1)
            else:
                # searchsorted to assign bins
                bin_idxs = np.clip(
                    np.searchsorted(edges, p_clip, side="right") - 1, 0, len(edges) - 2
                )
        except Exception:
            bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1)
    else:
        bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1)
    ece = 0.0
    total = len(y)
    bin_stats = []
    for b in range(n_bins):
        idx = bin_idxs == b
        count = int(idx.sum())
        if count == 0:
            bin_stats.append(
                {"count": 0, "mean_pred": float("nan"), "emp_freq": float("nan")}
            )
            continue
        mean_pred = float(p_clip[idx].mean())
        emp_freq = float(y[idx].mean())
        ece += abs(mean_pred - emp_freq) * count
        bin_stats.append({"count": count, "mean_pred": mean_pred, "emp_freq": emp_freq})
    ece = float(ece / total) if total > 0 else float("nan")
    # accuracy
    acc = float(np.mean((p_clip >= 0.5) == (y == 1)))
    # 校准拟合: 使用 LogisticRegression 拟合 logit(E[y]) = alpha + beta * logit(p)
    X = sp_logit(p_clip).reshape(-1, 1)
    clf = LogisticRegression(C=1e6, solver="lbfgs", max_iter=200)
    clf.fit(X, y)
    alpha = float(clf.intercept_[0])
    beta = float(clf.coef_[0][0])
    return {
        "logloss": logloss,
        "brier": brier,
        "ece": ece,
        "accuracy": acc,
        "reg_alpha": alpha,
        "reg_beta": beta,
        # 'ece_bins': bin_stats,
        "n_samples": int(total),
    }
 if __name__ == "__main__":
    df = pd.read_feather("data/p_res.feather")
    df["win_prob"] = df["power_p"]
    res = compute_metrics(df)
    print(res)
--- a/test.ipynb
+++ b/test.ipynb
@@ -594,38 +594,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 1,
   "id": "2dfaf8ca",
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
+     "ename": "NameError",
-     "output_type": "stream",
+     "evalue": "name 'df' is not defined",
-     "text": [
+     "output_type": "error",
-      "['1XBet' 'baseball'] 64485\n",
+     "traceback": [
-      "{'logloss': 0.6387950997506274, 'brier': 0.22453871048598073, 'ece': 0.05136008172379394, 'accuracy': 0.6251531363883074, 'reg_alpha': -0.22365612018752326, 'reg_beta': 0.816534516967482, 'n_samples': 64485, 'filter_cols': '1XBet,baseball'}\n",
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "['1XBet' 'basketball'] 166590\n",
+      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
-      "{'logloss': 0.669273027668078, 'brier': 0.23856284575034065, 'ece': 0.009400017057809669, 'accuracy': 0.5751905876703284, 'reg_alpha': 0.034476957539975685, 'reg_beta': 0.8760036858377837, 'n_samples': 166590, 'filter_cols': '1XBet,basketball'}\n",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcommon\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mbet_tools\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m compute_metrics\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m data_df = \u001b[43mdf\u001b[49m.copy()\n\u001b[32m      3\u001b[39m data_list = []\n\u001b[32m      4\u001b[39m cols = [\u001b[33m\"\u001b[39m\u001b[33msportsbook\u001b[39m\u001b[33m\"\u001b[39m,\u001b[33m\"\u001b[39m\u001b[33msport\u001b[39m\u001b[33m\"\u001b[39m]\n",
-      "['1XBet' 'football'] 36019\n",
+      "\u001b[31mNameError\u001b[39m: name 'df' is not defined"
      "{'logloss': 0.5156283069611564, 'brier': 0.17093288618023667, 'ece': 0.03737481116887414, 'accuracy': 0.7366389960853994, 'reg_alpha': -0.22019343021598026, 'reg_beta': 0.9096828468608887, 'n_samples': 36019, 'filter_cols': '1XBet,football'}\n",
      "['1XBet' 'hockey'] 3441\n",
      "['1XBet' 'soccer'] 170549\n",
      "{'logloss': 0.5326802320693952, 'brier': 0.17692943714557405, 'ece': 0.03728482187098291, 'accuracy': 0.7336014869626911, 'reg_alpha': -0.18193411314913413, 'reg_beta': 0.7726877806789224, 'n_samples': 170549, 'filter_cols': '1XBet,soccer'}\n",
      "['1XBet' 'tennis'] 114015\n",
      "{'logloss': 0.6389011505288847, 'brier': 0.22439917328514708, 'ece': 0.01927127655619132, 'accuracy': 0.6223479366749989, 'reg_alpha': -0.06976580882770708, 'reg_beta': 0.8523393655794403, 'n_samples': 114015, 'filter_cols': '1XBet,tennis'}\n",
      "['Pinnacle' 'baseball'] 13706\n",
      "{'logloss': 0.6488942536621299, 'brier': 0.2288787220784783, 'ece': 0.01759828591637442, 'accuracy': 0.6151320589522836, 'reg_alpha': -0.06787819238175896, 'reg_beta': 0.9034496569376994, 'n_samples': 13706, 'filter_cols': 'Pinnacle,baseball'}\n",
      "['Pinnacle' 'basketball'] 8588\n",
      "['Pinnacle' 'football'] 1477\n",
      "['Pinnacle' 'hockey'] 32\n",
      "['Pinnacle' 'soccer'] 2435\n",
      "['Pinnacle' 'tennis'] 48314\n",
      "{'logloss': 0.6472225570749982, 'brier': 0.22852018039069258, 'ece': 0.018568682287188856, 'accuracy': 0.605766444508838, 'reg_alpha': -0.07073057145248554, 'reg_beta': 0.9334853391615549, 'n_samples': 48314, 'filter_cols': 'Pinnacle,tennis'}\n"
     ]
    }
   ],
   "source": [
-    "from pinnacle_experiments import compute_metrics\n",
+    "from common.bet_tools import compute_metrics\n",
    "data_df = df.copy()\n",
    "data_list = []\n",
    "cols = [\"sportsbook\",\"sport\"]\n",