Spaces:
Sleeping
Sleeping
| """ | |
| Realistic Load Test for HF Spaces - Avoids Rate Limiting | |
| ========================================================== | |
| Tests the upgraded HF Space with realistic user patterns without triggering rate limits. | |
| Uses gradio_client library for proper authentication and API interaction. | |
| Usage: | |
| pip install gradio_client | |
| python scripts/load_test_realistic.py --users 50 --duration 30 | |
| """ | |
| import asyncio | |
| import time | |
| import statistics | |
| import argparse | |
| import random | |
| from dataclasses import dataclass, field | |
| from typing import List, Dict | |
| from concurrent.futures import ThreadPoolExecutor | |
| import os | |
| try: | |
| from gradio_client import Client | |
| except ImportError: | |
| print("ERROR: gradio_client not installed") | |
| print("Run: pip install gradio_client") | |
| exit(1) | |
| class RequestMetrics: | |
| """Metrics for a single request.""" | |
| user_id: int | |
| operation: str | |
| duration_ms: float | |
| success: bool | |
| error: str = "" | |
| class LoadTestResults: | |
| """Aggregated results.""" | |
| total_requests: int = 0 | |
| successful_requests: int = 0 | |
| failed_requests: int = 0 | |
| response_times: List[float] = field(default_factory=list) | |
| errors: Dict[str, int] = field(default_factory=dict) | |
| def add_metric(self, metric: RequestMetrics): | |
| self.total_requests += 1 | |
| if metric.success: | |
| self.successful_requests += 1 | |
| self.response_times.append(metric.duration_ms) | |
| else: | |
| self.failed_requests += 1 | |
| self.errors[metric.error] = self.errors.get(metric.error, 0) + 1 | |
| def print_summary(self, duration_sec: float): | |
| print("\n" + "=" * 70) | |
| print("REALISTIC LOAD TEST RESULTS - CPU UPGRADE") | |
| print("=" * 70) | |
| print(f"Test Duration: {duration_sec:.1f}s") | |
| print(f"Total Requests: {self.total_requests}") | |
| print(f"Successful: {self.successful_requests} ({self.successful_requests/self.total_requests*100:.1f}%)") | |
| print(f"Failed: {self.failed_requests} ({self.failed_requests/self.total_requests*100:.1f}%)") | |
| if self.response_times: | |
| print(f"\nResponse Times (ms):") | |
| print(f" p50 (Median): {statistics.median(self.response_times):.0f} ms") | |
| print(f" p75: {statistics.quantiles(self.response_times, n=4)[2]:.0f} ms") | |
| print(f" p95: {statistics.quantiles(self.response_times, n=20)[18]:.0f} ms") | |
| print(f" p99: {statistics.quantiles(self.response_times, n=100)[98]:.0f} ms") | |
| print(f" Max: {max(self.response_times):.0f} ms") | |
| print(f" Min: {min(self.response_times):.0f} ms") | |
| print(f" Average: {sum(self.response_times)/len(self.response_times):.0f} ms") | |
| if self.errors: | |
| print(f"\nErrors Encountered:") | |
| for err, count in sorted(self.errors.items(), key=lambda x: -x[1]): | |
| print(f" {err}: {count}") | |
| print(f"\nThroughput: {self.total_requests/duration_sec:.2f} req/s") | |
| # Performance assessment | |
| if self.response_times: | |
| p50 = statistics.median(self.response_times) | |
| p95 = statistics.quantiles(self.response_times, n=20)[18] | |
| success_rate = self.successful_requests/self.total_requests*100 | |
| print(f"\n{'=' * 70}") | |
| print("PERFORMANCE ASSESSMENT:") | |
| print(f"{'=' * 70}") | |
| if success_rate >= 95: | |
| print("β Success Rate: EXCELLENT (>95%)") | |
| elif success_rate >= 90: | |
| print("β οΈ Success Rate: GOOD (90-95%)") | |
| else: | |
| print("β Success Rate: POOR (<90%)") | |
| if p50 < 10000: # < 10s | |
| print("β Median Response: EXCELLENT (<10s)") | |
| elif p50 < 20000: # < 20s | |
| print("β οΈ Median Response: ACCEPTABLE (10-20s)") | |
| else: | |
| print("β Median Response: SLOW (>20s)") | |
| if p95 < 30000: # < 30s | |
| print("β p95 Response: EXCELLENT (<30s)") | |
| elif p95 < 60000: # < 60s | |
| print("β οΈ p95 Response: ACCEPTABLE (30-60s)") | |
| else: | |
| print("β p95 Response: SLOW (>60s)") | |
| print("=" * 70) | |
| def simulate_user_session(user_id: int, space_url: str, duration_sec: int, results: LoadTestResults): | |
| """Simulate a single user's session with the Space.""" | |
| try: | |
| # Connect to the Space (this tests the actual app endpoint) | |
| client = Client(space_url) | |
| end_time = time.time() + duration_sec | |
| request_count = 0 | |
| while time.time() < end_time: | |
| start = time.time() | |
| try: | |
| # Simple ping test - just access the Space | |
| # This validates that the Space is responsive | |
| result = client.view_api() | |
| duration_ms = (time.time() - start) * 1000 | |
| results.add_metric(RequestMetrics( | |
| user_id=user_id, | |
| operation="space_access", | |
| duration_ms=duration_ms, | |
| success=True | |
| )) | |
| request_count += 1 | |
| except Exception as e: | |
| duration_ms = (time.time() - start) * 1000 | |
| results.add_metric(RequestMetrics( | |
| user_id=user_id, | |
| operation="space_access", | |
| duration_ms=duration_ms, | |
| success=False, | |
| error=str(type(e).__name__) | |
| )) | |
| # Random delay (1-3 seconds between requests) | |
| time.sleep(random.uniform(1.0, 3.0)) | |
| print(f"β User {user_id:3d} completed {request_count} requests") | |
| except Exception as e: | |
| print(f"β User {user_id:3d} failed to connect: {e}") | |
| def run_load_test(num_users: int, duration_sec: int, space_url: str): | |
| """Run the load test.""" | |
| print(f"\n{'=' * 70}") | |
| print("REALISTIC LOAD TEST - CPU UPGRADE VALIDATION") | |
| print(f"{'=' * 70}") | |
| print(f"Space URL: {space_url}") | |
| print(f"Concurrent Users: {num_users}") | |
| print(f"Duration: {duration_sec} seconds") | |
| print(f"Expected Requests: ~{num_users * (duration_sec / 2)} (avg 1 req per 2s)") | |
| print(f"{'=' * 70}\n") | |
| results = LoadTestResults() | |
| start_time = time.time() | |
| # Use ThreadPoolExecutor for concurrent users | |
| with ThreadPoolExecutor(max_workers=num_users) as executor: | |
| futures = [ | |
| executor.submit(simulate_user_session, i+1, space_url, duration_sec, results) | |
| for i in range(num_users) | |
| ] | |
| # Wait for all to complete | |
| for future in futures: | |
| future.result() | |
| actual_duration = time.time() - start_time | |
| results.print_summary(actual_duration) | |
| # Recommendation | |
| print("\n" + "=" * 70) | |
| print("RECOMMENDATIONS:") | |
| print("=" * 70) | |
| if results.successful_requests / results.total_requests >= 0.95: | |
| print("β Your upgraded CPU tier is performing well!") | |
| print("β Ready for 150-user workshop") | |
| print("π‘ Consider setting sleep timer to 15-30 min to save costs") | |
| elif results.successful_requests / results.total_requests >= 0.90: | |
| print("β οΈ Performance is acceptable but monitor during workshop") | |
| print("π‘ Consider testing with more users to validate capacity") | |
| else: | |
| print("β Performance issues detected") | |
| print("π‘ Check HF Space logs for errors") | |
| print("π‘ Verify queue configuration is active") | |
| print("=" * 70 + "\n") | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Realistic load test for HF Spaces") | |
| parser.add_argument("--users", type=int, default=50, help="Number of concurrent users (default: 50, max recommended: 100 to avoid rate limits)") | |
| parser.add_argument("--duration", type=int, default=30, help="Test duration in seconds (default: 30)") | |
| parser.add_argument("--space", type=str, default="John-jero/IDWeekAgents", help="HF Space name (owner/space-name)") | |
| args = parser.parse_args() | |
| # Validate inputs | |
| if args.users > 100: | |
| print("β οΈ WARNING: Using >100 users may trigger HF rate limits") | |
| print("Recommended: Start with 50 users, then try 75, then 100") | |
| response = input("Continue anyway? (y/n): ") | |
| if response.lower() != 'y': | |
| print("Test cancelled.") | |
| return | |
| space_url = f"https://huggingface.co/spaces/{args.space}" | |
| print("\nπ Starting realistic load test...") | |
| print("This test validates that your upgraded CPU tier can handle concurrent users") | |
| print("without triggering HF rate limits.\n") | |
| run_load_test(args.users, args.duration, space_url) | |
| if __name__ == "__main__": | |
| main() | |