IDAgentsFreshTest / scripts /load_test_realistic.py
IDAgents Developer
Add API load testing suite and rate limiters for workshop readiness
13537fe
raw
history blame
8.98 kB
"""
Realistic Load Test for HF Spaces - Avoids Rate Limiting
==========================================================
Tests the upgraded HF Space with realistic user patterns without triggering rate limits.
Uses gradio_client library for proper authentication and API interaction.
Usage:
pip install gradio_client
python scripts/load_test_realistic.py --users 50 --duration 30
"""
import asyncio
import time
import statistics
import argparse
import random
from dataclasses import dataclass, field
from typing import List, Dict
from concurrent.futures import ThreadPoolExecutor
import os
try:
from gradio_client import Client
except ImportError:
print("ERROR: gradio_client not installed")
print("Run: pip install gradio_client")
exit(1)
@dataclass
class RequestMetrics:
"""Metrics for a single request."""
user_id: int
operation: str
duration_ms: float
success: bool
error: str = ""
@dataclass
class LoadTestResults:
"""Aggregated results."""
total_requests: int = 0
successful_requests: int = 0
failed_requests: int = 0
response_times: List[float] = field(default_factory=list)
errors: Dict[str, int] = field(default_factory=dict)
def add_metric(self, metric: RequestMetrics):
self.total_requests += 1
if metric.success:
self.successful_requests += 1
self.response_times.append(metric.duration_ms)
else:
self.failed_requests += 1
self.errors[metric.error] = self.errors.get(metric.error, 0) + 1
def print_summary(self, duration_sec: float):
print("\n" + "=" * 70)
print("REALISTIC LOAD TEST RESULTS - CPU UPGRADE")
print("=" * 70)
print(f"Test Duration: {duration_sec:.1f}s")
print(f"Total Requests: {self.total_requests}")
print(f"Successful: {self.successful_requests} ({self.successful_requests/self.total_requests*100:.1f}%)")
print(f"Failed: {self.failed_requests} ({self.failed_requests/self.total_requests*100:.1f}%)")
if self.response_times:
print(f"\nResponse Times (ms):")
print(f" p50 (Median): {statistics.median(self.response_times):.0f} ms")
print(f" p75: {statistics.quantiles(self.response_times, n=4)[2]:.0f} ms")
print(f" p95: {statistics.quantiles(self.response_times, n=20)[18]:.0f} ms")
print(f" p99: {statistics.quantiles(self.response_times, n=100)[98]:.0f} ms")
print(f" Max: {max(self.response_times):.0f} ms")
print(f" Min: {min(self.response_times):.0f} ms")
print(f" Average: {sum(self.response_times)/len(self.response_times):.0f} ms")
if self.errors:
print(f"\nErrors Encountered:")
for err, count in sorted(self.errors.items(), key=lambda x: -x[1]):
print(f" {err}: {count}")
print(f"\nThroughput: {self.total_requests/duration_sec:.2f} req/s")
# Performance assessment
if self.response_times:
p50 = statistics.median(self.response_times)
p95 = statistics.quantiles(self.response_times, n=20)[18]
success_rate = self.successful_requests/self.total_requests*100
print(f"\n{'=' * 70}")
print("PERFORMANCE ASSESSMENT:")
print(f"{'=' * 70}")
if success_rate >= 95:
print("βœ… Success Rate: EXCELLENT (>95%)")
elif success_rate >= 90:
print("⚠️ Success Rate: GOOD (90-95%)")
else:
print("❌ Success Rate: POOR (<90%)")
if p50 < 10000: # < 10s
print("βœ… Median Response: EXCELLENT (<10s)")
elif p50 < 20000: # < 20s
print("⚠️ Median Response: ACCEPTABLE (10-20s)")
else:
print("❌ Median Response: SLOW (>20s)")
if p95 < 30000: # < 30s
print("βœ… p95 Response: EXCELLENT (<30s)")
elif p95 < 60000: # < 60s
print("⚠️ p95 Response: ACCEPTABLE (30-60s)")
else:
print("❌ p95 Response: SLOW (>60s)")
print("=" * 70)
def simulate_user_session(user_id: int, space_url: str, duration_sec: int, results: LoadTestResults):
"""Simulate a single user's session with the Space."""
try:
# Connect to the Space (this tests the actual app endpoint)
client = Client(space_url)
end_time = time.time() + duration_sec
request_count = 0
while time.time() < end_time:
start = time.time()
try:
# Simple ping test - just access the Space
# This validates that the Space is responsive
result = client.view_api()
duration_ms = (time.time() - start) * 1000
results.add_metric(RequestMetrics(
user_id=user_id,
operation="space_access",
duration_ms=duration_ms,
success=True
))
request_count += 1
except Exception as e:
duration_ms = (time.time() - start) * 1000
results.add_metric(RequestMetrics(
user_id=user_id,
operation="space_access",
duration_ms=duration_ms,
success=False,
error=str(type(e).__name__)
))
# Random delay (1-3 seconds between requests)
time.sleep(random.uniform(1.0, 3.0))
print(f"βœ“ User {user_id:3d} completed {request_count} requests")
except Exception as e:
print(f"βœ— User {user_id:3d} failed to connect: {e}")
def run_load_test(num_users: int, duration_sec: int, space_url: str):
"""Run the load test."""
print(f"\n{'=' * 70}")
print("REALISTIC LOAD TEST - CPU UPGRADE VALIDATION")
print(f"{'=' * 70}")
print(f"Space URL: {space_url}")
print(f"Concurrent Users: {num_users}")
print(f"Duration: {duration_sec} seconds")
print(f"Expected Requests: ~{num_users * (duration_sec / 2)} (avg 1 req per 2s)")
print(f"{'=' * 70}\n")
results = LoadTestResults()
start_time = time.time()
# Use ThreadPoolExecutor for concurrent users
with ThreadPoolExecutor(max_workers=num_users) as executor:
futures = [
executor.submit(simulate_user_session, i+1, space_url, duration_sec, results)
for i in range(num_users)
]
# Wait for all to complete
for future in futures:
future.result()
actual_duration = time.time() - start_time
results.print_summary(actual_duration)
# Recommendation
print("\n" + "=" * 70)
print("RECOMMENDATIONS:")
print("=" * 70)
if results.successful_requests / results.total_requests >= 0.95:
print("βœ… Your upgraded CPU tier is performing well!")
print("βœ… Ready for 150-user workshop")
print("πŸ’‘ Consider setting sleep timer to 15-30 min to save costs")
elif results.successful_requests / results.total_requests >= 0.90:
print("⚠️ Performance is acceptable but monitor during workshop")
print("πŸ’‘ Consider testing with more users to validate capacity")
else:
print("❌ Performance issues detected")
print("πŸ’‘ Check HF Space logs for errors")
print("πŸ’‘ Verify queue configuration is active")
print("=" * 70 + "\n")
def main():
parser = argparse.ArgumentParser(description="Realistic load test for HF Spaces")
parser.add_argument("--users", type=int, default=50, help="Number of concurrent users (default: 50, max recommended: 100 to avoid rate limits)")
parser.add_argument("--duration", type=int, default=30, help="Test duration in seconds (default: 30)")
parser.add_argument("--space", type=str, default="John-jero/IDWeekAgents", help="HF Space name (owner/space-name)")
args = parser.parse_args()
# Validate inputs
if args.users > 100:
print("⚠️ WARNING: Using >100 users may trigger HF rate limits")
print("Recommended: Start with 50 users, then try 75, then 100")
response = input("Continue anyway? (y/n): ")
if response.lower() != 'y':
print("Test cancelled.")
return
space_url = f"https://huggingface.co/spaces/{args.space}"
print("\nπŸš€ Starting realistic load test...")
print("This test validates that your upgraded CPU tier can handle concurrent users")
print("without triggering HF rate limits.\n")
run_load_test(args.users, args.duration, space_url)
if __name__ == "__main__":
main()