""" Hugging Face Spaces Load Testing for IDWeek Agents =================================================== Simulates 150 concurrent users interacting with the Gradio interface: - Agent generation - Chat with agents - Tool usage (PubMed, guidelines, etc.) - Session isolation validation Metrics tracked: - End-to-end response time - Gradio concurrency limits - Session state integrity - Error rates and types Usage: python scripts/load_test_huggingface_spaces.py --users 150 --duration 60 --url https://huggingface.co/spaces/YOUR_SPACE """ import asyncio import aiohttp import time import statistics import argparse import json from dataclasses import dataclass, field from typing import List, Dict @dataclass class RequestMetrics: """Metrics for a single HTTP request.""" operation: str duration_ms: float status_code: int success: bool error: str = "" @dataclass class LoadTestResults: """Aggregated load test results.""" total_requests: int = 0 successful_requests: int = 0 failed_requests: int = 0 response_times: List[float] = field(default_factory=list) errors: Dict[str, int] = field(default_factory=dict) operations: Dict[str, int] = field(default_factory=dict) status_codes: Dict[int, int] = field(default_factory=dict) def add_metric(self, metric: RequestMetrics): self.total_requests += 1 if metric.success: self.successful_requests += 1 else: self.failed_requests += 1 self.errors[metric.error] = self.errors.get(metric.error, 0) + 1 self.response_times.append(metric.duration_ms) self.operations[metric.operation] = self.operations.get(metric.operation, 0) + 1 self.status_codes[metric.status_code] = self.status_codes.get(metric.status_code, 0) + 1 def print_summary(self, duration_sec: float): print("\n" + "=" * 70) print("HUGGING FACE SPACES LOAD TEST RESULTS") print("=" * 70) print(f"Duration: {duration_sec:.1f}s") print(f"Total Requests: {self.total_requests}") print(f"Successful: {self.successful_requests} ({self.successful_requests/self.total_requests*100:.1f}%)") print(f"Failed: {self.failed_requests} ({self.failed_requests/self.total_requests*100:.1f}%)") print(f"Throughput: {self.total_requests/duration_sec:.2f} req/s") if self.response_times: print(f"\nResponse Times (ms):") print(f" p50: {statistics.median(self.response_times):.0f}") print(f" p95: {statistics.quantiles(self.response_times, n=20)[18]:.0f}") print(f" p99: {statistics.quantiles(self.response_times, n=100)[98]:.0f}") print(f" Max: {max(self.response_times):.0f}") if self.operations: print(f"\nOperations Breakdown:") for op, count in sorted(self.operations.items(), key=lambda x: -x[1]): print(f" {op}: {count}") if self.status_codes: print(f"\nHTTP Status Codes:") for code, count in sorted(self.status_codes.items()): print(f" {code}: {count}") if self.errors: print(f"\nErrors:") for err, count in sorted(self.errors.items(), key=lambda x: -x[1]): print(f" {err}: {count}") print("=" * 70) # User interaction scenarios SCENARIOS = [ { "name": "generate_agent", "endpoint": "/api/predict", "weight": 10 # Less frequent }, { "name": "chat_with_agent", "endpoint": "/api/predict", "weight": 60 # Most common }, { "name": "load_patient_card", "endpoint": "/api/predict", "weight": 20 }, { "name": "view_agent_list", "endpoint": "/api/predict", "weight": 10 }, ] async def make_gradio_request(session: aiohttp.ClientSession, base_url: str, scenario: dict) -> RequestMetrics: """Simulate a Gradio API request.""" start = time.time() url = f"{base_url.rstrip('/')}/api/predict/" # Simplified payload (real app would have more complex data) payload = { "fn_index": 0, "data": ["test_user", "test_message"], "session_hash": f"user_{id(session)}_{start}", # Unique session per request } try: async with session.post(url, json=payload, timeout=aiohttp.ClientTimeout(total=60)) as response: duration_ms = (time.time() - start) * 1000 status = response.status success = 200 <= status < 300 return RequestMetrics( operation=scenario["name"], duration_ms=duration_ms, status_code=status, success=success, error="" if success else f"HTTP_{status}", ) except asyncio.TimeoutError: duration_ms = (time.time() - start) * 1000 return RequestMetrics( operation=scenario["name"], duration_ms=duration_ms, status_code=0, success=False, error="TimeoutError", ) except Exception as e: duration_ms = (time.time() - start) * 1000 return RequestMetrics( operation=scenario["name"], duration_ms=duration_ms, status_code=0, success=False, error=type(e).__name__, ) async def simulate_user(user_id: int, base_url: str, results: LoadTestResults, duration_sec: int): """Simulate a single user interacting with Gradio interface.""" import random async with aiohttp.ClientSession() as session: end_time = time.time() + duration_sec # Weighted scenario selection scenario_pool = [] for scenario in SCENARIOS: scenario_pool.extend([scenario] * scenario["weight"]) request_count = 0 while time.time() < end_time: scenario = random.choice(scenario_pool) metric = await make_gradio_request(session, base_url, scenario) results.add_metric(metric) request_count += 1 # Random delay between requests (1-5 seconds, simulating user interaction) await asyncio.sleep(random.uniform(1.0, 5.0)) print(f"User {user_id:3d} completed {request_count} requests") async def run_load_test(num_users: int, duration_sec: int, base_url: str): """Run the load test with specified users and duration.""" print(f"Starting HF Spaces load test: {num_users} concurrent users for {duration_sec}s") print(f"Target URL: {base_url}") print(f"Expected: ~{num_users * (duration_sec / 3)} total requests (avg 1 req per 3s per user)") print("-" * 70) results = LoadTestResults() start_time = time.time() # Launch all user simulations concurrently tasks = [ simulate_user(i+1, base_url, results, duration_sec) for i in range(num_users) ] await asyncio.gather(*tasks) actual_duration = time.time() - start_time results.print_summary(actual_duration) def main(): parser = argparse.ArgumentParser(description="Load test Hugging Face Spaces for IDWeek Agents") parser.add_argument("--users", type=int, default=150, help="Number of concurrent users (default: 150)") parser.add_argument("--duration", type=int, default=60, help="Test duration in seconds (default: 60)") parser.add_argument("--url", type=str, required=True, help="Base URL of HF Space (e.g., https://your-space.hf.space)") args = parser.parse_args() print("\n" + "=" * 70) print("Hugging Face Spaces Load Test for IDWeek Agents") print("=" * 70) print(f"Simulating {args.users} concurrent users for {args.duration} seconds") print(f"Target: {args.url}") print("=" * 70 + "\n") print("⚠️ WARNING: This will generate real load on your HF Space!") print("Make sure you have:") print(" 1. Upgraded to a paid Space tier (default free tier: 1-4 concurrent)") print(" 2. Configured appropriate Gradio concurrency settings") print(" 3. Monitored HF Space logs during the test") print("\nPress Ctrl+C within 5 seconds to cancel...") try: import time as sleep_time sleep_time.sleep(5) except KeyboardInterrupt: print("\n\nTest cancelled.") return print("\nStarting test...\n") asyncio.run(run_load_test(args.users, args.duration, args.url)) print("\n\n📊 Next Steps:") print("1. Check HF Space logs for errors, memory usage, CPU") print("2. Review Gradio queue depth and concurrency limits") print("3. Compare p95 latency to OpenAI API test (expect +2-5s overhead)") print("4. Validate session isolation (check for cross-user data leaks)") if __name__ == "__main__": main()