| version: '3.8' | |
| services: | |
| qwen3-codex-server: | |
| build: . | |
| ports: | |
| - "8000:8000" | |
| volumes: | |
| - ./Qwen3-4B-Function-Calling-Pro.gguf:/app/Qwen3-4B-Function-Calling-Pro.gguf:ro | |
| environment: | |
| - CUDA_VISIBLE_DEVICES="" # Disable CUDA for CPU-only mode | |
| restart: unless-stopped | |
| healthcheck: | |
| test: ["CMD", "curl", "-f", "http://localhost:8000/health"] | |
| interval: 30s | |
| timeout: 10s | |
| retries: 3 | |
| start_period: 40s | |
| deploy: | |
| resources: | |
| limits: | |
| memory: 8G | |
| reservations: | |
| memory: 6G | |