Spaces:

newmindai
/

Mizan

Running

App Files Files Community

Mizan / evaluation_service.py

nmmursit

Initial commit

9a235dc 11 days ago

raw

history blame contribute delete

6.8 kB

	#!/usr/bin/env python3
	"""
	Evaluation Service module for MTEB Turkish Leaderboard
	Handles evaluation submissions and status tracking
	"""

	import time
	import re
	from typing import Optional, Tuple, List
	import traceback
	import pandas as pd
	import gradio as gr

	from api_client import send_evaluation_request_to_api, get_evaluation_status, cancel_evaluation_request

	# Global state management for active evaluations
	active_evaluations = {} # request_id -> {"status": str, "model_name": str, "email": str, "start_time": float}


	def get_active_evaluations_status() -> str:
	"""Show status of active evaluations"""
	if not active_evaluations:
	return "🟢 No active evaluation requests"

	status_lines = []
	for request_id, info in active_evaluations.items():
	model_name = info["model_name"]
	email = info["email"]
	elapsed = int(time.time() - info["start_time"])
	status = info.get("status", "PENDING")
	status_lines.append(f"🔄 {model_name} ({email}) - {request_id} [{status}] ({elapsed}s)")

	return "\n".join(status_lines)


	def get_active_evaluations_with_cancel_options() -> Tuple[str, List[str]]:
	"""Get active evaluations status and cancellation options"""
	status_text = get_active_evaluations_status()

	cancel_options = []
	for request_id, info in active_evaluations.items():
	model_name = info["model_name"]
	cancel_options.append(f"{request_id} - {model_name}")

	return status_text, cancel_options


	def clear_active_evaluations() -> str:
	"""Clear all active evaluations from tracking"""
	global active_evaluations
	count = len(active_evaluations)
	active_evaluations.clear()
	return f"✅ Cleared {count} active evaluation(s) from tracking"


	def cancel_active_evaluation(selection: str) -> str:
	"""Cancel a selected active evaluation"""
	if not selection:
	return "❌ No evaluation selected for cancellation"

	try:
	request_id = selection.split(" - ")[0]

	if request_id not in active_evaluations:
	return f"❌ Evaluation {request_id} not found in active evaluations"

	# Try to cancel via API
	success = cancel_evaluation_request(request_id)

	if success:
	model_name = active_evaluations[request_id]["model_name"]
	del active_evaluations[request_id]
	return f"✅ Successfully cancelled evaluation for {model_name} (ID: {request_id})"
	else:
	return f"❌ Failed to cancel evaluation {request_id}. Check API connection."

	except Exception as e:
	return f"❌ Error cancelling evaluation: {str(e)}"


	def _validate_evaluation_request(model_name: str, email: str = None) -> Optional[str]:
	"""Validate evaluation request parameters"""
	# Model name validation
	if not model_name or not model_name.strip():
	return "❌ Model name cannot be empty!"

	model_name = model_name.strip()

	# Check model name length (format: org/model-name)
	if len(model_name) < 3:
	return "❌ Model name too short!"

	if len(model_name) > 256:
	return "❌ Model name too long (maximum 256 characters)!"

	# Check for valid HuggingFace model name format (must be org/model)
	if '/' not in model_name:
	return "❌ Invalid model name format! Must include organization (e.g., organization/model-name)"

	if not re.match(r'^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$', model_name):
	return "❌ Invalid model name format! Use format: organization/model-name"

	# Email validation
	if not email or not email.strip():
	return "❌ Email address cannot be empty!"

	email = email.strip()

	if len(email) > 254:
	return "❌ Email address too long!"

	email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
	if not re.match(email_pattern, email):
	return "❌ Invalid email address format!"

	return None


	def submit_evaluation(model_name: str, email: str, batch_size: int, current_data: pd.DataFrame, progress=gr.Progress()) -> Tuple[str, Optional[pd.DataFrame]]:
	try:
	# Input validation
	error_msg = _validate_evaluation_request(model_name, email)
	if error_msg:
	return error_msg, None

	# Show progress
	progress(0.1, desc="Sending evaluation request to API...")

	# Send request to API - regardless of backend response, show success to user
	api_response = send_evaluation_request_to_api(model_name, batch_size, email)

	# Always show success message to user
	# Backend errors (like duplicate requests) are handled by API and communicated via email
	progress(1.0, desc="Request submitted successfully!")

	# Return success message regardless of backend response
	success_msg = f"""
	✅ Evaluation request submitted successfully!

	🤖 Model: {model_name}
	📧 Email: {email}

	📋 Next Steps:
	⏱️ Your request will be reviewed by our system
	📧 You will receive email notifications about the status of your evaluation
	🔄 If you've submitted this model before, you'll be notified via email

	Thank you for contributing to the Mizan Leaderboard!
	"""

	return success_msg.strip(), current_data

	except Exception as e:
	# Log error for debugging
	print(f"❌ Error submitting evaluation: {str(e)}")
	traceback.print_exc()

	error_msg = f"""
	❌ Failed to submit evaluation request

	🤖 Model: {model_name}
	📧 Email: {email}

	⚠️ Error: Unable to connect to the evaluation service.

	Please try again later or contact support if the problem persists.
	"""
	return error_msg.strip(), None


	def refresh_evaluation_status() -> str:
	"""Refresh status of all active evaluations"""
	if not active_evaluations:
	return "🟢 No active evaluations to refresh"

	updated_count = 0
	for request_id, info in active_evaluations.items():
	try:
	status_data = get_evaluation_status(request_id)
	if status_data and "status" in status_data:
	old_status = info.get("status", "UNKNOWN")
	new_status = status_data["status"]
	if old_status != new_status:
	info["status"] = new_status
	updated_count += 1
	print(f"Status updated for {request_id}: {old_status} -> {new_status}")
	except Exception as e:
	print(f"Error refreshing status for {request_id}: {e}")

	return f"🔄 Refreshed status for {len(active_evaluations)} evaluation(s). {updated_count} status change(s) detected."