实现基于 token 长度的简单规则路由服务: - FastAPI 基础服务 (/v1/chat/completions) - 根据 token 长度自动选择模型 (gpt-3.5/gpt-4o-mini/gpt-4o) - 成本追踪和统计 (/stats) - 健康检查端点 (/health) - 总计 224 行代码
36 lines
728 B
Python
36 lines
728 B
Python
"""
|
|
简单配置管理
|
|
"""
|
|
import os
|
|
from typing import Literal
|
|
|
|
|
|
# 模型配置
|
|
MODEL_CONFIG = {
|
|
"gpt-3.5-turbo": {
|
|
"input_cost_per_1k": 0.0005,
|
|
"output_cost_per_1k": 0.0015,
|
|
"max_tokens": 4096,
|
|
},
|
|
"gpt-4o-mini": {
|
|
"input_cost_per_1k": 0.00015,
|
|
"output_cost_per_1k": 0.0006,
|
|
"max_tokens": 128000,
|
|
},
|
|
"gpt-4o": {
|
|
"input_cost_per_1k": 0.005,
|
|
"output_cost_per_1k": 0.015,
|
|
"max_tokens": 128000,
|
|
},
|
|
}
|
|
|
|
# 路由阈值
|
|
ROUTING_THRESHOLDS = {
|
|
"simple": 100, # < 100 tokens -> gpt-3.5-turbo
|
|
"medium": 500, # < 500 tokens -> gpt-4o-mini
|
|
# >= 500 tokens -> gpt-4o
|
|
}
|
|
|
|
# API Key
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|