# Kirim-V1-Base Docker Image # Optimized for inference with CUDA support FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 # Set environment variables ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 ENV CUDA_HOME=/usr/local/cuda ENV PATH=${CUDA_HOME}/bin:${PATH} ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ python3.10 \ python3-pip \ python3-dev \ git \ wget \ curl \ vim \ build-essential \ cmake \ ninja-build \ && rm -rf /var/lib/apt/lists/* # Upgrade pip and install Python dependencies RUN pip3 install --no-cache-dir --upgrade pip setuptools wheel # Install PyTorch with CUDA support RUN pip3 install --no-cache-dir \ torch==2.1.0 \ torchvision==0.16.0 \ torchaudio==2.1.0 \ --index-url https://download.pytorch.org/whl/cu121 # Copy requirements and install dependencies COPY requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt # Install additional optimization libraries RUN pip3 install --no-cache-dir \ flash-attn==2.3.0 --no-build-isolation \ xformers==0.0.22 # Copy model files and scripts COPY . . # Create directories for model cache and outputs RUN mkdir -p /app/models /app/outputs /root/.cache/huggingface # Set HuggingFace cache directory ENV HF_HOME=/root/.cache/huggingface ENV TRANSFORMERS_CACHE=/app/models # Expose port for API (if needed) EXPOSE 8000 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD python3 -c "import torch; print(torch.cuda.is_available())" || exit 1 # Default command - interactive chat CMD ["python3", "inference.py", "--chat"] # Alternative commands: # For API server: CMD ["python3", "api_server.py"] # For single inference: CMD ["python3", "inference.py", "--prompt", "你好"] # For quantized model: CMD ["python3", "inference.py", "--load_in_4bit", "--chat"]