fix: 修复 max_tokens 为 0 或 None 时响应内容被截断的问题
问题: Swagger UI 测试时 max_tokens 默认值为 0,导致 DashScope API
返回的响应内容只有 1 个 token(被截断)
修复:
- 非流式和流式响应中,当 max_tokens 为 None 或 ≤0 时不传给后端 API
- 让 DashScope 使用自己的默认 max_tokens 值(通常 2048/4096)
- 使用 completion_kwargs 字典动态构建请求参数
效果:
- Swagger UI 中 max_tokens 留空或设为 0 都能返回完整响应
- 需要限制输出时可手动设置合理的 max_tokens 值
This commit is contained in:
35
main.py
35
main.py
@@ -291,13 +291,18 @@ async def chat_completions(request: ChatCompletionRequest):
|
|||||||
|
|
||||||
# 3. 非流式响应
|
# 3. 非流式响应
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
|
# 构建请求参数(过滤掉 None 和 0 的 max_tokens)
|
||||||
|
completion_kwargs = {
|
||||||
|
"model": provider_model,
|
||||||
|
"messages": messages_raw,
|
||||||
|
"temperature": request.temperature,
|
||||||
|
}
|
||||||
|
if request.max_tokens and request.max_tokens > 0:
|
||||||
|
completion_kwargs["max_tokens"] = request.max_tokens
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await acompletion(
|
response = await acompletion(**completion_kwargs)
|
||||||
model=provider_model,
|
|
||||||
messages=messages_raw,
|
|
||||||
temperature=request.temperature,
|
|
||||||
max_tokens=request.max_tokens,
|
|
||||||
)
|
|
||||||
latency_ms = (time.time() - start_time) * 1000
|
latency_ms = (time.time() - start_time) * 1000
|
||||||
|
|
||||||
input_tokens = response.usage.prompt_tokens
|
input_tokens = response.usage.prompt_tokens
|
||||||
@@ -338,13 +343,17 @@ async def _stream_response(
|
|||||||
output_tokens = 0
|
output_tokens = 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await acompletion(
|
# 构建请求参数(过滤掉 None 和 0 的 max_tokens)
|
||||||
model=provider_model,
|
completion_kwargs = {
|
||||||
messages=messages_raw,
|
"model": provider_model,
|
||||||
temperature=request.temperature,
|
"messages": messages_raw,
|
||||||
max_tokens=request.max_tokens,
|
"temperature": request.temperature,
|
||||||
stream=True,
|
"stream": True,
|
||||||
)
|
}
|
||||||
|
if request.max_tokens and request.max_tokens > 0:
|
||||||
|
completion_kwargs["max_tokens"] = request.max_tokens
|
||||||
|
|
||||||
|
response = await acompletion(**completion_kwargs)
|
||||||
|
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
delta = chunk.choices[0].delta
|
delta = chunk.choices[0].delta
|
||||||
|
|||||||
Reference in New Issue
Block a user