feat: implement MVP LLM router service
实现基于 token 长度的简单规则路由服务: - FastAPI 基础服务 (/v1/chat/completions) - 根据 token 长度自动选择模型 (gpt-3.5/gpt-4o-mini/gpt-4o) - 成本追踪和统计 (/stats) - 健康检查端点 (/health) - 总计 224 行代码
This commit is contained in:
35
config.py
Normal file
35
config.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""
|
||||
简单配置管理
|
||||
"""
|
||||
import os
|
||||
from typing import Literal
|
||||
|
||||
|
||||
# 模型配置
|
||||
MODEL_CONFIG = {
|
||||
"gpt-3.5-turbo": {
|
||||
"input_cost_per_1k": 0.0005,
|
||||
"output_cost_per_1k": 0.0015,
|
||||
"max_tokens": 4096,
|
||||
},
|
||||
"gpt-4o-mini": {
|
||||
"input_cost_per_1k": 0.00015,
|
||||
"output_cost_per_1k": 0.0006,
|
||||
"max_tokens": 128000,
|
||||
},
|
||||
"gpt-4o": {
|
||||
"input_cost_per_1k": 0.005,
|
||||
"output_cost_per_1k": 0.015,
|
||||
"max_tokens": 128000,
|
||||
},
|
||||
}
|
||||
|
||||
# 路由阈值
|
||||
ROUTING_THRESHOLDS = {
|
||||
"simple": 100, # < 100 tokens -> gpt-3.5-turbo
|
||||
"medium": 500, # < 500 tokens -> gpt-4o-mini
|
||||
# >= 500 tokens -> gpt-4o
|
||||
}
|
||||
|
||||
# API Key
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||
Reference in New Issue
Block a user