DeepXR
/

Helion-V2.5-Rnd

+{
+  "monitoring": {
+    "enabled": true,
+    "interval_seconds": 15,
+    "retention_days": 30
+  },
+  "metrics": {
+    "system": {
+      "enabled": true,
+      "collect": [
+        "cpu_usage",
+        "memory_usage",
+        "disk_usage",
+        "network_io"
+      ]
+    },
+    "gpu": {
+      "enabled": true,
+      "collect": [
+        "gpu_utilization",
+        "gpu_memory_used",
+        "gpu_memory_total",
+        "gpu_temperature",
+        "gpu_power_usage"
+      ],
+      "alert_thresholds": {
+        "temperature_celsius": 85,
+        "memory_utilization_percent": 95,
+        "power_watts": 400
+      }
+    },
+    "model": {
+      "enabled": true,
+      "collect": [
+        "requests_per_second",
+        "tokens_per_second",
+        "average_latency_ms",
+        "p50_latency_ms",
+        "p95_latency_ms",
+        "p99_latency_ms",
+        "error_rate",
+        "active_connections",
+        "queue_depth"
+      ]
+    },
+    "inference": {
+      "enabled": true,
+      "collect": [
+        "prompt_tokens",
+        "completion_tokens",
+        "total_tokens",
+        "generation_time_ms",
+        "preprocessing_time_ms",
+        "postprocessing_time_ms"
+      ]
+    }
+  },
+  "alerts": {
+    "enabled": true,
+    "channels": [
+      "email",
+      "slack",
+      "pagerduty"
+    ],
+    "rules": [
+      {
+        "name": "high_error_rate",
+        "condition": "error_rate > 0.05",
+        "duration_seconds": 300,
+        "severity": "critical",
+        "message": "Error rate exceeded 5% for 5 minutes"
+      },
+      {
+        "name": "high_latency",
+        "condition": "p95_latency_ms > 5000",
+        "duration_seconds": 180,
+        "severity": "warning",
+        "message": "P95 latency exceeded 5 seconds"
+      },
+      {
+        "name": "gpu_temperature_high",
+        "condition": "gpu_temperature > 85",
+        "duration_seconds": 60,
+        "severity": "critical",
+        "message": "GPU temperature critically high"
+      },
+      {
+        "name": "memory_pressure",
+        "condition": "gpu_memory_used / gpu_memory_total > 0.95",
+        "duration_seconds": 300,
+        "severity": "warning",
+        "message": "GPU memory utilization above 95%"
+      },
+      {
+        "name": "low_throughput",
+        "condition": "tokens_per_second < 10",
+        "duration_seconds": 600,
+        "severity": "warning",
+        "message": "Throughput below 10 tokens/second"
+      }
+    ]
+  },
+  "logging": {
+    "level": "INFO",
+    "format": "json",
+    "outputs": [
+      {
+        "type": "file",
+        "path": "./logs/monitoring.log",
+        "rotation": "daily",
+        "retention_days": 30
+      },
+      {
+        "type": "stdout",
+        "enabled": true
+      },
+      {
+        "type": "elasticsearch",
+        "enabled": false,
+        "host": "localhost:9200",
+        "index": "helion-metrics"
+      }
+    ]
+  },
+  "prometheus": {
+    "enabled": true,
+    "port": 8001,
+    "path": "/metrics",
+    "namespace": "helion",
+    "subsystem": "inference",
+    "labels": {
+      "model": "Helion-2.5-Rnd",
+      "version": "2.5.0-rnd",
+      "environment": "production"
+    }
+  },
+  "grafana": {
+    "enabled": true,
+    "dashboards": [
+      {
+        "name": "Helion Overview",
+        "file": "./monitoring/dashboards/overview.json",
+        "refresh": "30s"
+      },
+      {
+        "name": "GPU Metrics",
+        "file": "./monitoring/dashboards/gpu.json",
+        "refresh": "15s"
+      },
+      {
+        "name": "Inference Performance",
+        "file": "./monitoring/dashboards/inference.json",
+        "refresh": "30s"
+      }
+    ]
+  },
+  "health_checks": {
+    "enabled": true,
+    "endpoint": "/health",
+    "interval_seconds": 30,
+    "timeout_seconds": 10,
+    "checks": [
+      {
+        "name": "model_loaded",
+        "type": "internal",
+        "critical": true
+      },
+      {
+        "name": "gpu_available",
+        "type": "internal",
+        "critical": true
+      },
+      {
+        "name": "inference_responsive",
+        "type": "endpoint",
+        "url": "http://localhost:8000/v1/models",
+        "critical": false
+      }
+    ]
+  },
+  "tracing": {
+    "enabled": true,
+    "sample_rate": 0.1,
+    "exporter": "jaeger",
+    "endpoint": "http://localhost:14268/api/traces"
+  },
+  "profiling": {
+    "enabled": false,
+    "interval_seconds": 3600,
+    "duration_seconds": 300,
+    "output_dir": "./profiling"
+  }
+}