yetessam commited on
Commit
6bf9c98
·
verified ·
1 Parent(s): adf4599

Refactored to be UI-free and minimal

Browse files
Files changed (1) hide show
  1. status_check.py +14 -117
status_check.py CHANGED
@@ -1,121 +1,18 @@
1
- import os
2
- import time
3
- import requests
4
 
5
  from typing import Optional, Tuple
6
- from ui.statusui import StatusUI
7
- from checks.health_check import check_model_endpoint
8
- from agents.model import huggingface_model_uri
9
-
10
-
11
- def wake_up_endpoint(
12
- endpoint_uri: str,
13
- ui,
14
- max_wait: int = 300,
15
- initial_delay: float = 3.0,
16
- backoff_factor: float = 1.5,
17
- max_retry_delay: float = 10.0
18
- ) -> Tuple[bool, Optional[str]]:
19
- """
20
- Poll the endpoint until it responds OK or timeout.
21
-
22
- Args:
23
- endpoint_uri: The endpoint URL to monitor
24
- ui: UI object for status updates
25
- max_wait: Maximum total wait time in seconds (minimum 60s enforced)
26
- initial_delay: Initial delay between retries in seconds
27
- backoff_factor: Multiplier for exponential backoff
28
- max_retry_delay: Maximum delay between retries in seconds
29
-
30
- Returns:
31
- Tuple of (success: bool, error_message: Optional[str])
32
- """
33
- # Configuration validation
34
- max_wait = max(max_wait, 60)
35
- current_delay = min(initial_delay, max_retry_delay)
36
-
37
- # Prepare request components
38
- headers = {}
39
- if hf_token := os.environ.get("HF_TOKEN"):
40
- headers["Authorization"] = f"Bearer {hf_token}"
41
-
42
- payload = {"inputs": "ping"}
43
- timeout = min(5, current_delay * 0.8) # Ensure timeout is less than delay
44
-
45
- start_time = time.time()
46
- last_status = None
47
-
48
- while (time.time() - start_time) < max_wait:
49
- try:
50
- # Log attempt
51
- if endpoint_uri != last_status:
52
- ui.append(f"Pinging endpoint: {endpoint_uri}")
53
- last_status = endpoint_uri
54
-
55
- # Make the request
56
- response = requests.post(
57
- endpoint_uri,
58
- headers=headers,
59
- json=payload,
60
- timeout=timeout
61
- )
62
-
63
- if response.ok:
64
- ui.append("✅ Endpoint is awake and responsive")
65
- return True, None
66
-
67
- # Handle specific HTTP status codes
68
- if response.status_code in {503, 504}:
69
- status_msg = f"Endpoint warming up (HTTP {response.status_code})"
70
- else:
71
- status_msg = f"Unexpected response (HTTP {response.status_code})"
72
-
73
- ui.append(f"{status_msg}, retrying in {current_delay:.1f}s...")
74
-
75
- except requests.exceptions.RequestException as e:
76
- ui.append(f"Connection error ({type(e).__name__}), retrying in {current_delay:.1f}s...")
77
-
78
- # Wait before next attempt with exponential backoff
79
- time.sleep(current_delay)
80
- current_delay = min(current_delay * backoff_factor, max_retry_delay)
81
- timeout = min(5, current_delay * 0.8)
82
-
83
- # Timeout reached
84
- error_msg = f"❌ Timed out after {max_wait}s waiting for endpoint"
85
- ui.append(error_msg)
86
- return False, error_msg
87
 
88
- def run_status_checks():
89
- """Run all status checks and return endpoint URI if successful"""
90
- ui = StatusUI()
91
- #ui.launch()
92
- # Then launch it (non-blocking)
93
- ui.launch(inbrowser=False, prevent_thread_lock=True)
94
-
95
- # Now send messages
96
- ui.append("Starting prechecks...")
97
- time.sleep(0.5) # Brief pause for UI to initialize
98
- ui.append("Checking endpoint..")
99
- ui.append("Starting prechecks...")
100
- ui.append("Checking endpoint..")
101
- endpoint_uri = huggingface_model_uri() # Get the URI for the endpoint
102
- ui.append(endpoint_uri)
103
 
104
- # Wake it up before health check
105
- wake_up_successful = wake_up_endpoint(endpoint_uri, ui)
106
- success, error_msg = wake_up_endpoint(endpoint_uri, ui)
107
- if not success:
108
- ui.append("Warning: Could not wake up the endpoint. Exiting.")
109
-
110
- else:
111
- ui.append("✅ End point responded OK.")
112
-
113
- is_healthy, status_info = check_model_endpoint(endpoint_uri) # Test the endpoint
114
-
115
- if not is_healthy:
116
- from checks.failed_check import create_failed_gradio_ui
117
- interface = create_failed_gradio_ui(status_info)
118
- interface.launch(show_error=True, share=True)
119
- return None
120
-
121
- return endpoint_uri
 
1
+ # status_check.py
 
 
2
 
3
  from typing import Optional, Tuple
4
+ import os
5
+ import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ def resolve_endpoint() -> Optional[str]:
8
+ # however you get it today; env var is simplest
9
+ return os.environ.get("HF_ENDPOINT_URI")
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ def is_endpoint_healthy(uri: str, timeout: float = 5.0) -> Tuple[bool, str]:
12
+ if not uri:
13
+ return False, "No endpoint URI configured."
14
+ try:
15
+ r = requests.post(uri, json={"inputs": "ping"}, timeout=timeout)
16
+ return (True, "OK") if r.ok else (False, f"HTTP {r.status_code}")
17
+ except requests.exceptions.RequestException as e:
18
+ return False, f"{type(e).__name__}"