| version: '3.8' | |
| services: | |
| kirim-v1-base: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile | |
| image: kirim-ai/kirim-v1-base:latest | |
| container_name: kirim-v1-inference | |
| # GPU support | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: all | |
| capabilities: [gpu] | |
| # Environment variables | |
| environment: | |
| - CUDA_VISIBLE_DEVICES=0 | |
| - HF_HOME=/app/models/.cache | |
| - TRANSFORMERS_CACHE=/app/models | |
| - MODEL_PATH=Kirim-ai/Kirim-V1-base | |
| - LOAD_IN_4BIT=false | |
| - LOAD_IN_8BIT=false | |
| # Volumes for persistent storage | |
| volumes: | |
| - ./models:/app/models | |
| - ./outputs:/app/outputs | |
| - ~/.cache/huggingface:/root/.cache/huggingface | |
| # Port mapping (for API server) | |
| ports: | |
| - "8000:8000" | |
| # Keep container running | |
| stdin_open: true | |
| tty: true | |
| # Restart policy | |
| restart: unless-stopped | |
| # Resource limits | |
| shm_size: '16gb' | |
| # Health check | |
| healthcheck: | |
| test: ["CMD", "python3", "-c", "import torch; assert torch.cuda.is_available()"] | |
| interval: 30s | |
| timeout: 10s | |
| retries: 3 | |
| start_period: 60s | |
| # Optional: Quantized version (4-bit) | |
| kirim-v1-4bit: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile | |
| image: kirim-ai/kirim-v1-base:4bit | |
| container_name: kirim-v1-4bit | |
| profiles: ["quantized"] | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: 1 | |
| capabilities: [gpu] | |
| environment: | |
| - CUDA_VISIBLE_DEVICES=0 | |
| - HF_HOME=/app/models/.cache | |
| - LOAD_IN_4BIT=true | |
| volumes: | |
| - ./models:/app/models | |
| - ./outputs:/app/outputs | |
| ports: | |
| - "8001:8000" | |
| stdin_open: true | |
| tty: true | |
| restart: unless-stopped | |
| shm_size: '8gb' | |
| command: ["python3", "inference.py", "--load_in_4bit", "--chat"] | |
| volumes: | |
| models: | |
| outputs: | |
| networks: | |
| default: | |
| name: kirim-network |