|
|
""" |
|
|
Background Scheduler for API Health Checks |
|
|
Runs periodic health checks with APScheduler |
|
|
""" |
|
|
|
|
|
import asyncio |
|
|
import logging |
|
|
from datetime import datetime |
|
|
from apscheduler.schedulers.background import BackgroundScheduler as APScheduler |
|
|
from apscheduler.triggers.interval import IntervalTrigger |
|
|
from typing import Optional |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class BackgroundScheduler: |
|
|
"""Background scheduler for periodic health checks""" |
|
|
|
|
|
def __init__(self, monitor, database, interval_minutes: int = 5): |
|
|
""" |
|
|
Initialize the scheduler |
|
|
|
|
|
Args: |
|
|
monitor: APIMonitor instance |
|
|
database: Database instance |
|
|
interval_minutes: Interval between health checks |
|
|
""" |
|
|
self.monitor = monitor |
|
|
self.database = database |
|
|
self.interval_minutes = interval_minutes |
|
|
self.scheduler = APScheduler() |
|
|
self.last_run_time: Optional[datetime] = None |
|
|
self._running = False |
|
|
|
|
|
def _run_health_check(self): |
|
|
"""Run health check and save results""" |
|
|
try: |
|
|
logger.info("Running scheduled health check...") |
|
|
self.last_run_time = datetime.now() |
|
|
|
|
|
|
|
|
results = asyncio.run(self.monitor.check_all()) |
|
|
|
|
|
|
|
|
self.database.save_health_checks(results) |
|
|
|
|
|
|
|
|
for result in results: |
|
|
if result.status.value == "offline": |
|
|
|
|
|
resources = self.monitor.config.get_all_resources() |
|
|
resource = next((r for r in resources if r.get('name') == result.provider_name), None) |
|
|
|
|
|
if resource and resource.get('tier', 3) == 1: |
|
|
|
|
|
self.database.create_incident( |
|
|
provider_name=result.provider_name, |
|
|
category=result.category, |
|
|
incident_type="service_offline", |
|
|
description=f"Tier 1 provider offline: {result.error_message}", |
|
|
severity="high" |
|
|
) |
|
|
|
|
|
|
|
|
self.database.create_alert( |
|
|
provider_name=result.provider_name, |
|
|
alert_type="tier1_offline", |
|
|
message=f"Critical: Tier 1 provider {result.provider_name} is offline" |
|
|
) |
|
|
|
|
|
logger.info(f"Health check completed. Checked {len(results)} providers.") |
|
|
|
|
|
|
|
|
self.database.cleanup_old_data(days=7) |
|
|
|
|
|
|
|
|
self.database.aggregate_response_times(period_hours=1) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error in scheduled health check: {e}") |
|
|
|
|
|
def start(self): |
|
|
"""Start the scheduler""" |
|
|
if not self._running: |
|
|
try: |
|
|
|
|
|
self.scheduler.add_job( |
|
|
func=self._run_health_check, |
|
|
trigger=IntervalTrigger(minutes=self.interval_minutes), |
|
|
id='health_check_job', |
|
|
name='API Health Check', |
|
|
replace_existing=True |
|
|
) |
|
|
|
|
|
self.scheduler.start() |
|
|
self._running = True |
|
|
logger.info(f"Scheduler started. Running every {self.interval_minutes} minutes.") |
|
|
|
|
|
|
|
|
self._run_health_check() |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error starting scheduler: {e}") |
|
|
|
|
|
def stop(self): |
|
|
"""Stop the scheduler""" |
|
|
if self._running: |
|
|
self.scheduler.shutdown() |
|
|
self._running = False |
|
|
logger.info("Scheduler stopped.") |
|
|
|
|
|
def update_interval(self, interval_minutes: int): |
|
|
"""Update the check interval""" |
|
|
self.interval_minutes = interval_minutes |
|
|
|
|
|
if self._running: |
|
|
|
|
|
self.scheduler.reschedule_job( |
|
|
job_id='health_check_job', |
|
|
trigger=IntervalTrigger(minutes=interval_minutes) |
|
|
) |
|
|
logger.info(f"Scheduler interval updated to {interval_minutes} minutes.") |
|
|
|
|
|
def is_running(self) -> bool: |
|
|
"""Check if scheduler is running""" |
|
|
return self._running |
|
|
|
|
|
def trigger_immediate_check(self): |
|
|
"""Trigger an immediate health check""" |
|
|
logger.info("Triggering immediate health check...") |
|
|
self._run_health_check() |
|
|
|