diff --git a/main.py b/main.py index 801e41d..0ed6433 100644 --- a/main.py +++ b/main.py @@ -58,23 +58,33 @@ _load_history() # ── OpenAI 兼容请求/响应模型 ──────────────────────────────── +from pydantic import BaseModel, Field class ChatMessage(BaseModel): - role: str - content: Optional[str] = None - name: Optional[str] = None + role: str = Field(..., description="角色:system, user, assistant", example="user") + content: Optional[str] = Field(None, description="消息内容", example="你好,介绍一下你自己") + name: Optional[str] = Field(None, description="可选的名称") class ChatCompletionRequest(BaseModel): - model: Optional[str] = None - messages: List[ChatMessage] - temperature: Optional[float] = 0.7 - max_tokens: Optional[int] = None - stream: Optional[bool] = False - top_p: Optional[float] = 1.0 - n: Optional[int] = 1 - stop: Optional[Any] = None - presence_penalty: Optional[float] = 0.0 - frequency_penalty: Optional[float] = 0.0 - user: Optional[str] = None + model: Optional[str] = Field( + None, + description="模型名称(留空时自动使用 NVIDIA 分类器智能路由)", + example="qwen-plus", + json_schema_extra={"examples": ["", "qwen-flash", "qwen-plus", "qwen-max"]} + ) + messages: List[ChatMessage] = Field( + ..., + description="对话消息列表", + example=[{"role": "user", "content": "你好,介绍一下你自己"}] + ) + temperature: Optional[float] = Field(0.7, ge=0, le=2, description="随机性 (0-2)") + max_tokens: Optional[int] = Field(None, ge=1, description="最大生成 token 数") + stream: Optional[bool] = Field(False, description="是否使用流式输出") + top_p: Optional[float] = Field(1.0, ge=0, le=1, description="核采样参数") + n: Optional[int] = Field(1, ge=1, le=10, description="生成回复数量") + stop: Optional[Any] = Field(None, description="停止词") + presence_penalty: Optional[float] = Field(0.0, ge=-2, le=2, description="存在惩罚") + frequency_penalty: Optional[float] = Field(0.0, ge=-2, le=2, description="频率惩罚") + user: Optional[str] = Field(None, description="用户标识") # ── FastAPI App ──────────────────────────────────────────────