import math import numpy as np import pandas as pd from scipy.optimize import fsolve # 导入 fsolve 函数用于数值求解 from scipy.special import logit as sp_logit from sklearn.linear_model import LogisticRegression from sklearn.metrics import log_loss def moneyline_to_prob(moneyline_odds: int) -> float: """将 Moneyline 赔率转换为隐含概率.""" if moneyline_odds == 0: raise ValueError("Moneyline odds cannot be 0") elif moneyline_odds > 0: # 正赔率 +X -> 隐含概率 = 100 / (100 + X) return 100 / (moneyline_odds + 100) else: # moneyline_odds <= 0 # 负赔率 -X -> 隐含概率 = X / (X + 100) return abs(moneyline_odds) / (abs(moneyline_odds) + 100) def prob_to_moneyline(probability: float) -> int: """将概率转换为 Moneyline 赔率 (四舍五入到最接近的整数).""" if not 0 < probability < 1: # 概率为 0 或 1 对应无限或 -100 的 Moneyline 赔率,这里简化处理,实际中极少遇到精确的 0 或 1 if math.isclose(probability, 0): return float("inf") if math.isclose(probability, 1): return ( -100 ) # 或者 raise ValueError("Probability must be between 0 and 1 (exclusive)") raise ValueError("Probability must be between 0 and 1 (exclusive)") if probability <= 0.5: # 概率 <= 0.5 对应正 Moneyline 赔率 (Decimal >= 2.0) # Decimal Odds = 1 / probability # Moneyline = (Decimal Odds - 1) * 100 return round((1 / probability - 1) * 100, 2) else: # 概率 > 0.5 对应负 Moneyline 赔率 (Decimal < 2.0) # Decimal Odds = 1 / probability # Moneyline = -100 / (Decimal Odds - 1) return round(-100 / (1 / probability - 1), 2) def calculate_no_vig_moneyline_multir(moneyline_odds_list: list[int]) -> list[int]: """ 通过乘法法(乘法归一法,Multiplicative Rescaling)对任意赔率组计算去除vig(取消庄家水钱)后的moneyline赔率。 具体步骤: 1. 将各moneyline赔率转换为隐含概率(带vig)。 2. 将所有隐含概率加总,得到带vig的总和sum_p,通常 >1。 3. 对每个概率除以总和,得到去vig的无水概率。 4. 将该去vig概率再换算回moneyline赔率。 示例: 输入: [+120, -150] 步骤: implied_probs = [100/220, 150/250] = [0.4545, 0.6] sum_p = 1.0545 novig_probs = [0.4545/1.0545, 0.6/1.0545] 回转moneyline 输出: 去vig后的moneyline列表 参数: moneyline_odds_list (list[int]): 原始moneyline赔率列表 返回: list[int]: 对应的去vig后moneyline赔率列表 """ if not moneyline_odds_list: return [] # 步骤1: 计算带vig的隐含概率 implied_probabilities = [moneyline_to_prob(odds) for odds in moneyline_odds_list] # 步骤2: 计算总概率,理论上>1表示有vig prob_total = sum(implied_probabilities) # 步骤3: 每个概率除以总和,得到去vig的概率(归一化) no_vig_probabilities = [prob / prob_total for prob in implied_probabilities] # 步骤4: 概率转回moneyline赔率 no_vig_moneyline_odds = [ prob_to_moneyline(p_novig) for p_novig in no_vig_probabilities ] return no_vig_moneyline_odds def calculate_no_vig_moneyline_power(moneyline_odds_list: list[int]) -> list[int]: """ 使用 Power Method (根据提供的文献描述) 计算无 vigorish 的 Moneyline 赔率。 该方法通过寻找 k 使得 sum(implied_prob^k) = 1 来调整概率。 参数: moneyline_odds_list (list): 包含所有可能结果的 Moneyline 整数赔率列表 (例如, [+116, -156])。 返回: list: 包含所有可能结果的无 vigorish Moneyline 整数赔率列表。 """ if not moneyline_odds_list: return [] # 1. 将 Moneyline 赔率转换为隐含概率 (pi) implied_probabilities = [moneyline_to_prob(odds) for odds in moneyline_odds_list] # 确保所有隐含概率都大于 0,否则无法进行幂运算或取对数 (数值求解时可能涉及) if any(p <= 0 for p in implied_probabilities): raise ValueError("All implied probabilities must be positive.") total_implied_probability = sum(implied_probabilities) # 如果总概率 <= 1,说明没有 vig 或 vig 极少,直接返回原始赔率 if total_implied_probability <= 1: print( "Warning: Input odds already have little or no vig. Returning original odds." ) return moneyline_odds_list # 2. 定义需要找到根的函数 f(k) = sum(pi^k) - 1 # 我们要找到 k 使得 sum(pi^k) = 1 # 由于 sum(pi) > 1 且 pi < 1, 我们需要 k > 1 才能让 pi^k < pi, 从而降低总和至 1。 def sum_pi_pow_k_minus_1(k): # fsolve 传入的 k 是一个数组,我们需要取其第一个元素 k_val = k[0] if isinstance(k, (list, tuple)) else k # 计算 sum(pi^k) sum_val = sum(p**k_val for p in implied_probabilities) return sum_val - 1 # 我们的目标是让这个函数等于 0 # 3. 寻找 k 使得 f(k) = 0 # 我们知道当 k=1 时,总和是 total_implied_probability (>1)。 # 当 k 增大时,sum(pi^k) 会减小。所以根 k 应该大于 1。 # 提供一个合理的初始猜测值给 fsolve,例如 1.1 或 1.5 initial_k_guess = [1.1] # fsolve 期望一个数组作为初始猜测 # 使用 fsolve 寻找 k # fsolve 返回一个数组,即使只有一个解 k_solution = fsolve(sum_pi_pow_k_minus_1, initial_k_guess) # 提取求解到的 k 值 k = k_solution[0] # 4. 计算无 Vig 概率 pi_novig = pi^k no_vig_probabilities = [p**k for p in implied_probabilities] # 由于浮点数精度和数值求解的限制,最终的概率之和可能不严格等于 1。 # 虽然理论上由 k 的定义保证总和为 1,但实践中检查一下是有益的。 final_sum_check = sum(no_vig_probabilities) if not math.isclose(final_sum_check, 1.0, abs_tol=1e-9): print( f"Warning: Final no-vig probabilities sum to {final_sum_check:.6f}, expected 1.0. Sum may need slight re-normalization." ) # 理论上 Power Method 的定义保证了总和为 1,但如果因为数值误差偏离较多, # 可以选择在这里进行最后的比例调整,但严格遵循方法定义是不需要的。 # 5. 将无 Vig 概率转换回 Moneyline 赔率 no_vig_moneyline_odds = [ prob_to_moneyline(p_novig) for p_novig in no_vig_probabilities ] return no_vig_moneyline_odds def compute_metrics( df: pd.DataFrame, n_bins: int = 10, bin_strategy: str = "uniform", # 'uniform' or 'quantile' include_draws: bool = True, eps: float = 1e-6, ) -> dict: """ 计算预测评估指标并拟合校准关系。 参数: - df: 包含至少两列: 'win_prob' (预测主胜概率), 'res' (取 'won','refunded','lost') - n_bins: ECE 分箱数 - bin_strategy: 'uniform' (等宽) 或 'quantile' (等频) - include_draws: 若 True, 将 'draw' 视为非胜 (y=0)。若 False, 丢弃 'draw' 行。 - eps: 概率裁剪下限,用于数值稳定 返回: dict 包含 logloss, brier, ece, accuracy, reg_alpha, reg_beta, ece_bins, n_samples """ # 处理 refunded if include_draws: mask = df["res"].isin(["won", "refunded", "lost"]) else: mask = df["res"].isin(["won", "lost"]) df = df[mask].copy() # 标签: won=1, others=0 (包括 refunded) y = df["res"].map({"won": 1, "refunded": 0, "lost": 0}).astype(int).values p = df["win_prob"].astype(float).values # 裁剪概率以保证数值稳定 p_clip = np.clip(p, eps, 1 - eps) # logloss: 使用 sklearn 实现以获得更稳健的数值行为 try: logloss = float(log_loss(y, p_clip, labels=[0, 1])) except Exception: # 备用实现 logloss = float(-np.mean(y * np.log(p_clip) + (1 - y) * np.log(1 - p_clip))) # brier score brier = float(np.mean((p_clip - y) ** 2)) # ECE 计算 (支持 uniform 或 quantile) if bin_strategy == "quantile": # quantile bin edges try: edges = np.unique(np.percentile(p_clip, np.linspace(0, 100, n_bins + 1))) if len(edges) - 1 <= 0: # fallback to uniform bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1) else: # searchsorted to assign bins bin_idxs = np.clip( np.searchsorted(edges, p_clip, side="right") - 1, 0, len(edges) - 2 ) except Exception: bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1) else: bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1) ece = 0.0 total = len(y) bin_stats = [] for b in range(n_bins): idx = bin_idxs == b count = int(idx.sum()) if count == 0: bin_stats.append( {"count": 0, "mean_pred": float("nan"), "emp_freq": float("nan")} ) continue mean_pred = float(p_clip[idx].mean()) emp_freq = float(y[idx].mean()) ece += abs(mean_pred - emp_freq) * count bin_stats.append({"count": count, "mean_pred": mean_pred, "emp_freq": emp_freq}) ece = float(ece / total) if total > 0 else float("nan") # accuracy acc = float(np.mean((p_clip >= 0.5) == (y == 1))) # 校准拟合: 使用 LogisticRegression 拟合 logit(E[y]) = alpha + beta * logit(p) X = sp_logit(p_clip).reshape(-1, 1) clf = LogisticRegression(C=1e6, solver="lbfgs", max_iter=200) clf.fit(X, y) alpha = float(clf.intercept_[0]) beta = float(clf.coef_[0][0]) return { "logloss": logloss, "brier": brier, "ece": ece, "accuracy": acc, "reg_alpha": alpha, "reg_beta": beta, # 'ece_bins': bin_stats, "n_samples": int(total), } # 示例 if __name__ == "__main__": odds_list = [+150, -200, +300, -120] for odds in odds_list: prob = moneyline_to_prob(odds) print(f"赔率 {odds}: 概率 {prob:.4f}") odds = [+116, -156] # 计算无 Vig 赔率使用 Power Method no_vig_odds_power = calculate_no_vig_moneyline_power(odds) print(f"原始 Moneyline 赔率: {odds}") print(f"无 Vig Moneyline 赔率 (Power Method): {no_vig_odds_power}") # 可选: 验证无 vig 赔率对应的概率之和是否接近 1 if no_vig_odds_power: novig_probs_power = [moneyline_to_prob(o) for o in no_vig_odds_power] print(f"无 Vig 概率之和 (基于计算出的赔率): {sum(novig_probs_power):.6f}")