feat: 添加Docker部署配置
Dockerfile: 多阶段构建, CPU版PyTorch, 模型预加载 docker-compose: 健康检查, 数据卷持久化, 环境变量注入 requirements.lock.txt: 锁定依赖版本确保构建可复现
This commit is contained in:
11
.dockerignore
Normal file
11
.dockerignore
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
venv/
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.git/
|
||||||
|
.env
|
||||||
|
data/
|
||||||
|
docs/
|
||||||
|
*.md
|
||||||
|
.gitignore
|
||||||
|
.env.example
|
||||||
|
.pytest_cache/
|
||||||
47
Dockerfile
Normal file
47
Dockerfile
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# ── Stage 1: 依赖安装 ──────────────────────────────────────
|
||||||
|
FROM python:3.12-slim AS builder
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
# 先拷贝依赖文件,利用 Docker 缓存
|
||||||
|
COPY requirements.lock.txt .
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir -r requirements.lock.txt
|
||||||
|
|
||||||
|
# ── Stage 2: 运行时 ────────────────────────────────────────
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
LABEL maintainer="LLM Compass"
|
||||||
|
LABEL description="智能LLM路由服务,为请求指引最优模型"
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# 安装运行时系统依赖(sentencepiece 等)
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends libgomp1 && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 从 builder 拷贝 Python 包
|
||||||
|
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
|
||||||
|
COPY --from=builder /usr/local/bin /usr/local/bin
|
||||||
|
|
||||||
|
# 拷贝应用代码
|
||||||
|
COPY config.py main.py nvidia_router.py ./
|
||||||
|
|
||||||
|
# 创建数据目录
|
||||||
|
RUN mkdir -p /app/data
|
||||||
|
|
||||||
|
# 预下载 NVIDIA 模型(构建时缓存,避免每次启动下载)
|
||||||
|
RUN python -c "from nvidia_router import get_nvidia_router; r = get_nvidia_router(); r.initialize(); print('Model preloaded successfully')" || echo "Model preload failed, will download on first request"
|
||||||
|
|
||||||
|
# 环境变量(敏感信息通过 docker-compose / --env-file 注入)
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# 暴露端口
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# 数据持久化
|
||||||
|
VOLUME ["/app/data"]
|
||||||
|
|
||||||
|
# 启动命令
|
||||||
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
|
||||||
20
docker-compose.yml
Normal file
20
docker-compose.yml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
services:
|
||||||
|
llm-compass:
|
||||||
|
build: .
|
||||||
|
container_name: llm-compass
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
environment:
|
||||||
|
- DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
|
||||||
|
volumes:
|
||||||
|
- compass-data:/app/data
|
||||||
|
restart: unless-stopped
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
compass-data:
|
||||||
17
requirements.lock.txt
Normal file
17
requirements.lock.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
# LLM Compass - Docker 锁定依赖 (CPU)
|
||||||
|
# 使用 CPU 版 PyTorch,大幅减小镜像体积
|
||||||
|
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
|
fastapi==0.136.0
|
||||||
|
uvicorn[standard]==0.44.0
|
||||||
|
pydantic==2.12.5
|
||||||
|
litellm==1.83.9
|
||||||
|
tiktoken==0.12.0
|
||||||
|
httpx==0.28.1
|
||||||
|
python-dotenv==1.0.1
|
||||||
|
torch==2.2.2+cpu
|
||||||
|
transformers==4.57.6
|
||||||
|
tokenizers==0.22.2
|
||||||
|
safetensors==0.4.3
|
||||||
|
numpy==1.26.4
|
||||||
|
sentencepiece==0.2.1
|
||||||
|
huggingface_hub>=0.28.0
|
||||||
Reference in New Issue
Block a user