Files
bet/common/bet_tools.py

282 lines
11 KiB
Python

import math
import numpy as np
import pandas as pd
from scipy.optimize import fsolve # 导入 fsolve 函数用于数值求解
from scipy.special import logit as sp_logit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
def moneyline_to_prob(moneyline_odds: int) -> float:
"""将 Moneyline 赔率转换为隐含概率."""
if moneyline_odds == 0:
raise ValueError("Moneyline odds cannot be 0")
elif moneyline_odds > 0:
# 正赔率 +X -> 隐含概率 = 100 / (100 + X)
return 100 / (moneyline_odds + 100)
else: # moneyline_odds <= 0
# 负赔率 -X -> 隐含概率 = X / (X + 100)
return abs(moneyline_odds) / (abs(moneyline_odds) + 100)
def prob_to_moneyline(probability: float) -> int:
"""将概率转换为 Moneyline 赔率 (四舍五入到最接近的整数)."""
if not 0 < probability < 1:
# 概率为 0 或 1 对应无限或 -100 的 Moneyline 赔率,这里简化处理,实际中极少遇到精确的 0 或 1
if math.isclose(probability, 0):
return float("inf")
if math.isclose(probability, 1):
return (
-100
) # 或者 raise ValueError("Probability must be between 0 and 1 (exclusive)")
raise ValueError("Probability must be between 0 and 1 (exclusive)")
if probability <= 0.5:
# 概率 <= 0.5 对应正 Moneyline 赔率 (Decimal >= 2.0)
# Decimal Odds = 1 / probability
# Moneyline = (Decimal Odds - 1) * 100
return round((1 / probability - 1) * 100, 2)
else:
# 概率 > 0.5 对应负 Moneyline 赔率 (Decimal < 2.0)
# Decimal Odds = 1 / probability
# Moneyline = -100 / (Decimal Odds - 1)
return round(-100 / (1 / probability - 1), 2)
def calculate_no_vig_moneyline_multir(moneyline_odds_list: list[int]) -> list[int]:
"""
通过乘法法(乘法归一法,Multiplicative Rescaling)对任意赔率组计算去除vig(取消庄家水钱)后的moneyline赔率。
具体步骤:
1. 将各moneyline赔率转换为隐含概率(带vig)。
2. 将所有隐含概率加总,得到带vig的总和sum_p,通常 >1。
3. 对每个概率除以总和,得到去vig的无水概率。
4. 将该去vig概率再换算回moneyline赔率。
示例:
输入: [+120, -150]
步骤:
implied_probs = [100/220, 150/250] = [0.4545, 0.6]
sum_p = 1.0545
novig_probs = [0.4545/1.0545, 0.6/1.0545]
回转moneyline
输出: 去vig后的moneyline列表
参数:
moneyline_odds_list (list[int]): 原始moneyline赔率列表
返回:
list[int]: 对应的去vig后moneyline赔率列表
"""
if not moneyline_odds_list:
return []
# 步骤1: 计算带vig的隐含概率
implied_probabilities = [moneyline_to_prob(odds) for odds in moneyline_odds_list]
# 步骤2: 计算总概率,理论上>1表示有vig
prob_total = sum(implied_probabilities)
# 步骤3: 每个概率除以总和,得到去vig的概率(归一化)
no_vig_probabilities = [prob / prob_total for prob in implied_probabilities]
# 步骤4: 概率转回moneyline赔率
no_vig_moneyline_odds = [
prob_to_moneyline(p_novig) for p_novig in no_vig_probabilities
]
return no_vig_moneyline_odds
def calculate_no_vig_moneyline_power(moneyline_odds_list: list[int]) -> list[int]:
"""
使用 Power Method (根据提供的文献描述) 计算无 vigorish 的 Moneyline 赔率。
该方法通过寻找 k 使得 sum(implied_prob^k) = 1 来调整概率。
参数:
moneyline_odds_list (list): 包含所有可能结果的 Moneyline 整数赔率列表 (例如, [+116, -156])。
返回:
list: 包含所有可能结果的无 vigorish Moneyline 整数赔率列表。
"""
if not moneyline_odds_list:
return []
# 1. 将 Moneyline 赔率转换为隐含概率 (pi)
implied_probabilities = [moneyline_to_prob(odds) for odds in moneyline_odds_list]
# 确保所有隐含概率都大于 0,否则无法进行幂运算或取对数 (数值求解时可能涉及)
if any(p <= 0 for p in implied_probabilities):
raise ValueError("All implied probabilities must be positive.")
total_implied_probability = sum(implied_probabilities)
# 如果总概率 <= 1,说明没有 vig 或 vig 极少,直接返回原始赔率
if total_implied_probability <= 1:
print(
"Warning: Input odds already have little or no vig. Returning original odds."
)
return moneyline_odds_list
# 2. 定义需要找到根的函数 f(k) = sum(pi^k) - 1
# 我们要找到 k 使得 sum(pi^k) = 1
# 由于 sum(pi) > 1 且 pi < 1, 我们需要 k > 1 才能让 pi^k < pi, 从而降低总和至 1。
def sum_pi_pow_k_minus_1(k):
# fsolve 传入的 k 是一个数组,我们需要取其第一个元素
k_val = k[0] if isinstance(k, (list, tuple)) else k
# 计算 sum(pi^k)
sum_val = sum(p**k_val for p in implied_probabilities)
return sum_val - 1 # 我们的目标是让这个函数等于 0
# 3. 寻找 k 使得 f(k) = 0
# 我们知道当 k=1 时,总和是 total_implied_probability (>1)。
# 当 k 增大时,sum(pi^k) 会减小。所以根 k 应该大于 1。
# 提供一个合理的初始猜测值给 fsolve,例如 1.1 或 1.5
initial_k_guess = [1.1] # fsolve 期望一个数组作为初始猜测
# 使用 fsolve 寻找 k
# fsolve 返回一个数组,即使只有一个解
k_solution = fsolve(sum_pi_pow_k_minus_1, initial_k_guess)
# 提取求解到的 k 值
k = k_solution[0]
# 4. 计算无 Vig 概率 pi_novig = pi^k
no_vig_probabilities = [p**k for p in implied_probabilities]
# 由于浮点数精度和数值求解的限制,最终的概率之和可能不严格等于 1。
# 虽然理论上由 k 的定义保证总和为 1,但实践中检查一下是有益的。
final_sum_check = sum(no_vig_probabilities)
if not math.isclose(final_sum_check, 1.0, abs_tol=1e-9):
print(
f"Warning: Final no-vig probabilities sum to {final_sum_check:.6f}, expected 1.0. Sum may need slight re-normalization."
)
# 理论上 Power Method 的定义保证了总和为 1,但如果因为数值误差偏离较多,
# 可以选择在这里进行最后的比例调整,但严格遵循方法定义是不需要的。
# 5. 将无 Vig 概率转换回 Moneyline 赔率
no_vig_moneyline_odds = [
prob_to_moneyline(p_novig) for p_novig in no_vig_probabilities
]
return no_vig_moneyline_odds
def compute_metrics(
df: pd.DataFrame,
n_bins: int = 10,
bin_strategy: str = "uniform", # 'uniform' or 'quantile'
include_draws: bool = True,
eps: float = 1e-6,
) -> dict:
"""
计算预测评估指标并拟合校准关系。
参数:
- df: 包含至少两列: 'win_prob' (预测主胜概率), 'res' (取 'won','refunded','lost')
- n_bins: ECE 分箱数
- bin_strategy: 'uniform' (等宽) 或 'quantile' (等频)
- include_draws: 若 True, 将 'draw' 视为非胜 (y=0)。若 False, 丢弃 'draw' 行。
- eps: 概率裁剪下限,用于数值稳定
返回:
dict 包含 logloss, brier, ece, accuracy, reg_alpha, reg_beta, ece_bins, n_samples
"""
# 处理 refunded
if include_draws:
mask = df["res"].isin(["won", "refunded", "lost"])
else:
mask = df["res"].isin(["won", "lost"])
df = df[mask].copy()
# 标签: won=1, others=0 (包括 refunded)
y = df["res"].map({"won": 1, "refunded": 0, "lost": 0}).astype(int).values
p = df["win_prob"].astype(float).values
# 裁剪概率以保证数值稳定
p_clip = np.clip(p, eps, 1 - eps)
# logloss: 使用 sklearn 实现以获得更稳健的数值行为
try:
logloss = float(log_loss(y, p_clip, labels=[0, 1]))
except Exception:
# 备用实现
logloss = float(-np.mean(y * np.log(p_clip) + (1 - y) * np.log(1 - p_clip)))
# brier score
brier = float(np.mean((p_clip - y) ** 2))
# ECE 计算 (支持 uniform 或 quantile)
if bin_strategy == "quantile":
# quantile bin edges
try:
edges = np.unique(np.percentile(p_clip, np.linspace(0, 100, n_bins + 1)))
if len(edges) - 1 <= 0:
# fallback to uniform
bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1)
else:
# searchsorted to assign bins
bin_idxs = np.clip(
np.searchsorted(edges, p_clip, side="right") - 1, 0, len(edges) - 2
)
except Exception:
bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1)
else:
bin_idxs = np.minimum((p_clip * n_bins).astype(int), n_bins - 1)
ece = 0.0
total = len(y)
bin_stats = []
for b in range(n_bins):
idx = bin_idxs == b
count = int(idx.sum())
if count == 0:
bin_stats.append(
{"count": 0, "mean_pred": float("nan"), "emp_freq": float("nan")}
)
continue
mean_pred = float(p_clip[idx].mean())
emp_freq = float(y[idx].mean())
ece += abs(mean_pred - emp_freq) * count
bin_stats.append({"count": count, "mean_pred": mean_pred, "emp_freq": emp_freq})
ece = float(ece / total) if total > 0 else float("nan")
# accuracy
acc = float(np.mean((p_clip >= 0.5) == (y == 1)))
# 校准拟合: 使用 LogisticRegression 拟合 logit(E[y]) = alpha + beta * logit(p)
X = sp_logit(p_clip).reshape(-1, 1)
clf = LogisticRegression(C=1e6, solver="lbfgs", max_iter=200)
clf.fit(X, y)
alpha = float(clf.intercept_[0])
beta = float(clf.coef_[0][0])
return {
"logloss": logloss,
"brier": brier,
"ece": ece,
"accuracy": acc,
"reg_alpha": alpha,
"reg_beta": beta,
# 'ece_bins': bin_stats,
"n_samples": int(total),
}
# 示例
if __name__ == "__main__":
odds_list = [+150, -200, +300, -120]
for odds in odds_list:
prob = moneyline_to_prob(odds)
print(f"赔率 {odds}: 概率 {prob:.4f}")
odds = [+116, -156]
# 计算无 Vig 赔率使用 Power Method
no_vig_odds_power = calculate_no_vig_moneyline_power(odds)
print(f"原始 Moneyline 赔率: {odds}")
print(f"无 Vig Moneyline 赔率 (Power Method): {no_vig_odds_power}")
# 可选: 验证无 vig 赔率对应的概率之和是否接近 1
if no_vig_odds_power:
novig_probs_power = [moneyline_to_prob(o) for o in no_vig_odds_power]
print(f"无 Vig 概率之和 (基于计算出的赔率): {sum(novig_probs_power):.6f}")