Dockerfile: 多阶段构建, CPU版PyTorch, 模型预加载 docker-compose: 健康检查, 数据卷持久化, 环境变量注入 requirements.lock.txt: 锁定依赖版本确保构建可复现
48 lines
1.6 KiB
Docker
48 lines
1.6 KiB
Docker
# ── Stage 1: 依赖安装 ──────────────────────────────────────
|
||
FROM python:3.12-slim AS builder
|
||
|
||
WORKDIR /build
|
||
|
||
# 先拷贝依赖文件,利用 Docker 缓存
|
||
COPY requirements.lock.txt .
|
||
|
||
RUN pip install --no-cache-dir -r requirements.lock.txt
|
||
|
||
# ── Stage 2: 运行时 ────────────────────────────────────────
|
||
FROM python:3.12-slim
|
||
|
||
LABEL maintainer="LLM Compass"
|
||
LABEL description="智能LLM路由服务,为请求指引最优模型"
|
||
|
||
WORKDIR /app
|
||
|
||
# 安装运行时系统依赖(sentencepiece 等)
|
||
RUN apt-get update && \
|
||
apt-get install -y --no-install-recommends libgomp1 && \
|
||
rm -rf /var/lib/apt/lists/*
|
||
|
||
# 从 builder 拷贝 Python 包
|
||
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
|
||
COPY --from=builder /usr/local/bin /usr/local/bin
|
||
|
||
# 拷贝应用代码
|
||
COPY config.py main.py nvidia_router.py ./
|
||
|
||
# 创建数据目录
|
||
RUN mkdir -p /app/data
|
||
|
||
# 预下载 NVIDIA 模型(构建时缓存,避免每次启动下载)
|
||
RUN python -c "from nvidia_router import get_nvidia_router; r = get_nvidia_router(); r.initialize(); print('Model preloaded successfully')" || echo "Model preload failed, will download on first request"
|
||
|
||
# 环境变量(敏感信息通过 docker-compose / --env-file 注入)
|
||
ENV PYTHONUNBUFFERED=1
|
||
|
||
# 暴露端口
|
||
EXPOSE 8000
|
||
|
||
# 数据持久化
|
||
VOLUME ["/app/data"]
|
||
|
||
# 启动命令
|
||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
|