Manojb's picture
Upload folder using huggingface_hub
812540e verified
version: '3.8'
services:
qwen3-codex-server:
build: .
ports:
- "8000:8000"
volumes:
- ./Qwen3-4B-Function-Calling-Pro.gguf:/app/Qwen3-4B-Function-Calling-Pro.gguf:ro
environment:
- CUDA_VISIBLE_DEVICES="" # Disable CUDA for CPU-only mode
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
deploy:
resources:
limits:
memory: 8G
reservations:
memory: 6G