feat: implement MVP LLM router service

实现基于 token 长度的简单规则路由服务:
- FastAPI 基础服务 (/v1/chat/completions)
- 根据 token 长度自动选择模型 (gpt-3.5/gpt-4o-mini/gpt-4o)
- 成本追踪和统计 (/stats)
- 健康检查端点 (/health)
- 总计 224 行代码
This commit is contained in:
2026-04-17 23:33:43 +08:00
parent 55506952c1
commit 4a8de8925e
4 changed files with 287 additions and 0 deletions

35
config.py Normal file
View File

@@ -0,0 +1,35 @@
"""
简单配置管理
"""
import os
from typing import Literal
# 模型配置
MODEL_CONFIG = {
"gpt-3.5-turbo": {
"input_cost_per_1k": 0.0005,
"output_cost_per_1k": 0.0015,
"max_tokens": 4096,
},
"gpt-4o-mini": {
"input_cost_per_1k": 0.00015,
"output_cost_per_1k": 0.0006,
"max_tokens": 128000,
},
"gpt-4o": {
"input_cost_per_1k": 0.005,
"output_cost_per_1k": 0.015,
"max_tokens": 128000,
},
}
# 路由阈值
ROUTING_THRESHOLDS = {
"simple": 100, # < 100 tokens -> gpt-3.5-turbo
"medium": 500, # < 500 tokens -> gpt-4o-mini
# >= 500 tokens -> gpt-4o
}
# API Key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")