Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Mark Duppenthaler
commited on
Commit
·
b087e88
1
Parent(s):
54be5f9
Combined leaderboard, simplified filters
Browse files- backend/app.py +29 -22
- backend/chart.py +11 -41
- backend/config.py +48 -4
- frontend/src/App.tsx +27 -12
- frontend/src/components/DatasetSelector.tsx +12 -12
- frontend/src/components/Examples.tsx +2 -1
- frontend/src/components/LeaderBoardPage.tsx +0 -34
- frontend/src/components/LeaderboardChart.tsx +241 -0
- frontend/src/components/LeaderboardFilter.tsx +1 -1
- frontend/src/components/LeaderboardPage.tsx +77 -0
- frontend/src/components/LeaderboardTable.tsx +467 -429
- frontend/src/components/LoadingSpinner.tsx +15 -0
- frontend/src/components/ModelFilter.tsx +1 -1
backend/app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from backend.chart import mk_variations
|
| 2 |
-
from backend.config import get_dataset_config
|
| 3 |
from backend.examples import audio_examples_tab, image_examples_tab, video_examples_tab
|
| 4 |
-
from flask import Flask, Response, send_from_directory
|
| 5 |
from flask_cors import CORS
|
| 6 |
import os
|
| 7 |
import logging
|
|
@@ -39,41 +39,47 @@ def index():
|
|
| 39 |
@app.route("/data/<path:dataset_name>")
|
| 40 |
def data_files(dataset_name):
|
| 41 |
"""
|
| 42 |
-
Serves csv files from
|
| 43 |
"""
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
logger.info(f"Looking for dataset file: {file_path}")
|
| 47 |
-
|
| 48 |
df = pd.read_csv(file_path)
|
| 49 |
logger.info(f"Processing dataset: {dataset_name}")
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
@app.route("/examples/<path:type>")
|
| 59 |
def example_files(type):
|
| 60 |
"""
|
| 61 |
-
Serve example files from
|
| 62 |
"""
|
| 63 |
|
| 64 |
-
abs_path = "https://dl.fbaipublicfiles.com/omnisealbench/"
|
| 65 |
-
|
| 66 |
# Switch based on the type parameter to call the appropriate tab function
|
| 67 |
if type == "image":
|
| 68 |
-
result = image_examples_tab(
|
| 69 |
return Response(json.dumps(result), mimetype="application/json")
|
| 70 |
elif type == "audio":
|
| 71 |
# Assuming you'll create these functions
|
| 72 |
-
result = audio_examples_tab(
|
| 73 |
return Response(json.dumps(result), mimetype="application/json")
|
| 74 |
elif type == "video":
|
| 75 |
# Assuming you'll create these functions
|
| 76 |
-
result = video_examples_tab(
|
| 77 |
return Response(json.dumps(result), mimetype="application/json")
|
| 78 |
else:
|
| 79 |
return "Invalid example type", 400
|
|
@@ -91,7 +97,7 @@ def proxy(url):
|
|
| 91 |
url = unquote(url)
|
| 92 |
|
| 93 |
# Make sure we're only proxying from trusted domains for security
|
| 94 |
-
if not url.startswith(
|
| 95 |
return {"error": "Only proxying from allowed domains is permitted"}, 403
|
| 96 |
|
| 97 |
response = requests.get(url, stream=True)
|
|
@@ -120,9 +126,9 @@ def proxy(url):
|
|
| 120 |
return {"error": str(e)}, 500
|
| 121 |
|
| 122 |
|
| 123 |
-
def get_leaderboard(
|
| 124 |
# Determine file type and handle accordingly
|
| 125 |
-
config
|
| 126 |
|
| 127 |
# This part adds on all the columns
|
| 128 |
df = get_old_format_dataframe(df, config["first_cols"], config["attack_scores"])
|
|
@@ -146,11 +152,12 @@ def get_leaderboard(dataset_name, df):
|
|
| 146 |
return Response(json.dumps(result), mimetype="application/json")
|
| 147 |
|
| 148 |
|
| 149 |
-
def get_chart(df):
|
| 150 |
# This function should return the chart data based on the DataFrame
|
| 151 |
# For now, we will just return a placeholder response
|
| 152 |
chart_data = mk_variations(
|
| 153 |
df,
|
|
|
|
| 154 |
# attacks_plot_metrics,
|
| 155 |
# audio_attacks_with_variations,
|
| 156 |
)
|
|
|
|
| 1 |
from backend.chart import mk_variations
|
| 2 |
+
from backend.config import ABS_DATASET_DOMAIN, ABS_DATASET_PATH, get_dataset_config
|
| 3 |
from backend.examples import audio_examples_tab, image_examples_tab, video_examples_tab
|
| 4 |
+
from flask import Flask, Response, send_from_directory, request
|
| 5 |
from flask_cors import CORS
|
| 6 |
import os
|
| 7 |
import logging
|
|
|
|
| 39 |
@app.route("/data/<path:dataset_name>")
|
| 40 |
def data_files(dataset_name):
|
| 41 |
"""
|
| 42 |
+
Serves csv files from S3.
|
| 43 |
"""
|
| 44 |
+
# Get dataset_type from query params
|
| 45 |
+
dataset_type = request.args.get("dataset_type")
|
| 46 |
+
if not dataset_type:
|
| 47 |
+
logger.error("No dataset_type provided in query parameters.")
|
| 48 |
+
return "Dataset type not specified", 400
|
| 49 |
+
|
| 50 |
+
# data_dir = os.path.join(os.path.dirname(__file__), "data")
|
| 51 |
+
file_path = os.path.join(ABS_DATASET_PATH, dataset_name) + f"_{dataset_type}.csv"
|
| 52 |
logger.info(f"Looking for dataset file: {file_path}")
|
| 53 |
+
try:
|
| 54 |
df = pd.read_csv(file_path)
|
| 55 |
logger.info(f"Processing dataset: {dataset_name}")
|
| 56 |
+
config = get_dataset_config(dataset_name)
|
| 57 |
+
if dataset_type == "benchmark":
|
| 58 |
+
return get_leaderboard(config, df)
|
| 59 |
+
elif dataset_type == "attacks_variations":
|
| 60 |
+
return get_chart(config, df)
|
| 61 |
+
except:
|
| 62 |
+
logger.error(f"Failed to fetch file: {file_path}")
|
| 63 |
+
return "File not found", 404
|
| 64 |
|
| 65 |
|
| 66 |
@app.route("/examples/<path:type>")
|
| 67 |
def example_files(type):
|
| 68 |
"""
|
| 69 |
+
Serve example files from S3.
|
| 70 |
"""
|
| 71 |
|
|
|
|
|
|
|
| 72 |
# Switch based on the type parameter to call the appropriate tab function
|
| 73 |
if type == "image":
|
| 74 |
+
result = image_examples_tab(ABS_DATASET_PATH)
|
| 75 |
return Response(json.dumps(result), mimetype="application/json")
|
| 76 |
elif type == "audio":
|
| 77 |
# Assuming you'll create these functions
|
| 78 |
+
result = audio_examples_tab(ABS_DATASET_PATH)
|
| 79 |
return Response(json.dumps(result), mimetype="application/json")
|
| 80 |
elif type == "video":
|
| 81 |
# Assuming you'll create these functions
|
| 82 |
+
result = video_examples_tab(ABS_DATASET_PATH)
|
| 83 |
return Response(json.dumps(result), mimetype="application/json")
|
| 84 |
else:
|
| 85 |
return "Invalid example type", 400
|
|
|
|
| 97 |
url = unquote(url)
|
| 98 |
|
| 99 |
# Make sure we're only proxying from trusted domains for security
|
| 100 |
+
if not url.startswith(ABS_DATASET_DOMAIN):
|
| 101 |
return {"error": "Only proxying from allowed domains is permitted"}, 403
|
| 102 |
|
| 103 |
response = requests.get(url, stream=True)
|
|
|
|
| 126 |
return {"error": str(e)}, 500
|
| 127 |
|
| 128 |
|
| 129 |
+
def get_leaderboard(config, df):
|
| 130 |
# Determine file type and handle accordingly
|
| 131 |
+
logger.warning(f"Processing dataset with config: {config}")
|
| 132 |
|
| 133 |
# This part adds on all the columns
|
| 134 |
df = get_old_format_dataframe(df, config["first_cols"], config["attack_scores"])
|
|
|
|
| 152 |
return Response(json.dumps(result), mimetype="application/json")
|
| 153 |
|
| 154 |
|
| 155 |
+
def get_chart(config, df):
|
| 156 |
# This function should return the chart data based on the DataFrame
|
| 157 |
# For now, we will just return a placeholder response
|
| 158 |
chart_data = mk_variations(
|
| 159 |
df,
|
| 160 |
+
config["attacks_with_variations"],
|
| 161 |
# attacks_plot_metrics,
|
| 162 |
# audio_attacks_with_variations,
|
| 163 |
)
|
backend/chart.py
CHANGED
|
@@ -2,44 +2,6 @@ import pandas as pd
|
|
| 2 |
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
-
audio_attacks_with_variations = [
|
| 6 |
-
"random_noise",
|
| 7 |
-
"lowpass_filter",
|
| 8 |
-
"highpass_filter",
|
| 9 |
-
"boost_audio",
|
| 10 |
-
"duck_audio",
|
| 11 |
-
"shush",
|
| 12 |
-
]
|
| 13 |
-
|
| 14 |
-
attacks_plot_metrics = ["bit_acc", "log10_p_value", "TPR", "FPR", "watermark_det_score"]
|
| 15 |
-
|
| 16 |
-
image_attacks_with_variations = [
|
| 17 |
-
"center_crop",
|
| 18 |
-
"jpeg",
|
| 19 |
-
"brightness",
|
| 20 |
-
"contrast",
|
| 21 |
-
"saturation",
|
| 22 |
-
"sharpness",
|
| 23 |
-
"resize",
|
| 24 |
-
"perspective",
|
| 25 |
-
"median_filter",
|
| 26 |
-
"hue",
|
| 27 |
-
"gaussian_blur",
|
| 28 |
-
]
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
video_attacks_with_variations = [
|
| 32 |
-
"Rotate",
|
| 33 |
-
"Resize",
|
| 34 |
-
"Crop",
|
| 35 |
-
"Brightness",
|
| 36 |
-
"Contrast",
|
| 37 |
-
"Saturation",
|
| 38 |
-
"H264",
|
| 39 |
-
"H264rgb",
|
| 40 |
-
"H265",
|
| 41 |
-
]
|
| 42 |
-
|
| 43 |
|
| 44 |
def plot_data(metric, selected_attack, all_attacks_df):
|
| 45 |
attack_df = all_attacks_df[all_attacks_df.attack == selected_attack]
|
|
@@ -57,8 +19,7 @@ def plot_data(metric, selected_attack, all_attacks_df):
|
|
| 57 |
|
| 58 |
def mk_variations(
|
| 59 |
all_attacks_df,
|
| 60 |
-
|
| 61 |
-
attacks_with_variations: list[str] = audio_attacks_with_variations,
|
| 62 |
):
|
| 63 |
# all_attacks_df = pd.read_csv(csv_file)
|
| 64 |
# print(all_attacks_df)
|
|
@@ -92,8 +53,17 @@ def mk_variations(
|
|
| 92 |
# all_graphs,
|
| 93 |
# )
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
return {
|
| 96 |
-
"metrics":
|
| 97 |
"attacks_with_variations": attacks_with_variations,
|
| 98 |
"all_attacks_df": all_attacks_df.to_dict(orient="records"),
|
| 99 |
}
|
|
|
|
| 2 |
|
| 3 |
from pathlib import Path
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
def plot_data(metric, selected_attack, all_attacks_df):
|
| 7 |
attack_df = all_attacks_df[all_attacks_df.attack == selected_attack]
|
|
|
|
| 19 |
|
| 20 |
def mk_variations(
|
| 21 |
all_attacks_df,
|
| 22 |
+
attacks_with_variations: list[str],
|
|
|
|
| 23 |
):
|
| 24 |
# all_attacks_df = pd.read_csv(csv_file)
|
| 25 |
# print(all_attacks_df)
|
|
|
|
| 53 |
# all_graphs,
|
| 54 |
# )
|
| 55 |
|
| 56 |
+
# Replace NaN values with None for JSON serialization
|
| 57 |
+
all_attacks_df = all_attacks_df.fillna(value="NaN")
|
| 58 |
+
attacks_plot_metrics = [
|
| 59 |
+
"bit_acc",
|
| 60 |
+
"log10_p_value",
|
| 61 |
+
"TPR",
|
| 62 |
+
"FPR",
|
| 63 |
+
"watermark_det_score",
|
| 64 |
+
]
|
| 65 |
return {
|
| 66 |
+
"metrics": attacks_plot_metrics,
|
| 67 |
"attacks_with_variations": attacks_with_variations,
|
| 68 |
"all_attacks_df": all_attacks_df.to_dict(orient="records"),
|
| 69 |
}
|
backend/config.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
def get_dataset_config(dataset_name):
|
| 2 |
-
if dataset_name == "
|
| 3 |
return {
|
| 4 |
"first_cols": [
|
| 5 |
"snr",
|
|
@@ -29,8 +33,16 @@ def get_dataset_config(dataset_name):
|
|
| 29 |
"aac_compression": "Compression",
|
| 30 |
"mp3_compression": "Compression",
|
| 31 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
}
|
| 33 |
-
elif dataset_name == "
|
| 34 |
return {
|
| 35 |
"first_cols": ["snr", "sisnr", "stoi", "pesq"],
|
| 36 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
|
@@ -50,8 +62,16 @@ def get_dataset_config(dataset_name):
|
|
| 50 |
"aac_compression": "Compression",
|
| 51 |
"mp3_compression": "Compression",
|
| 52 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
}
|
| 54 |
-
elif dataset_name == "
|
| 55 |
return {
|
| 56 |
"first_cols": ["psnr", "ssim", "lpips", "decoder_time"],
|
| 57 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
|
@@ -76,8 +96,21 @@ def get_dataset_config(dataset_name):
|
|
| 76 |
"avg": "Averages",
|
| 77 |
"none": "Baseline",
|
| 78 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
}
|
| 80 |
-
elif dataset_name == "
|
| 81 |
return {
|
| 82 |
"first_cols": ["psnr", "ssim", "msssim", "lpips", "vmaf", "decoder_time"],
|
| 83 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
|
@@ -104,6 +137,17 @@ def get_dataset_config(dataset_name):
|
|
| 104 |
"H264_Crop_Brightness2": "Mixed",
|
| 105 |
"H264_Crop_Brightness3": "Mixed",
|
| 106 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
}
|
| 108 |
else:
|
| 109 |
raise ValueError(f"Unknown dataset: {dataset_name}")
|
|
|
|
| 1 |
+
ABS_DATASET_DOMAIN = "https://dl.fbaipublicfiles.com"
|
| 2 |
+
ABS_DATASET_PATH = f"{ABS_DATASET_DOMAIN}/omnisealbench/"
|
| 3 |
+
|
| 4 |
+
|
| 5 |
def get_dataset_config(dataset_name):
|
| 6 |
+
if dataset_name == "voxpopuli_1k/audio":
|
| 7 |
return {
|
| 8 |
"first_cols": [
|
| 9 |
"snr",
|
|
|
|
| 33 |
"aac_compression": "Compression",
|
| 34 |
"mp3_compression": "Compression",
|
| 35 |
},
|
| 36 |
+
"attacks_with_variations": [
|
| 37 |
+
"random_noise",
|
| 38 |
+
"lowpass_filter",
|
| 39 |
+
"highpass_filter",
|
| 40 |
+
"boost_audio",
|
| 41 |
+
"duck_audio",
|
| 42 |
+
"shush",
|
| 43 |
+
],
|
| 44 |
}
|
| 45 |
+
elif dataset_name == "ravdess_1k/audio":
|
| 46 |
return {
|
| 47 |
"first_cols": ["snr", "sisnr", "stoi", "pesq"],
|
| 48 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
|
|
|
| 62 |
"aac_compression": "Compression",
|
| 63 |
"mp3_compression": "Compression",
|
| 64 |
},
|
| 65 |
+
"attacks_with_variations": [
|
| 66 |
+
"random_noise",
|
| 67 |
+
"lowpass_filter",
|
| 68 |
+
"highpass_filter",
|
| 69 |
+
"boost_audio",
|
| 70 |
+
"duck_audio",
|
| 71 |
+
"shush",
|
| 72 |
+
],
|
| 73 |
}
|
| 74 |
+
elif dataset_name == "val2014_1k/image":
|
| 75 |
return {
|
| 76 |
"first_cols": ["psnr", "ssim", "lpips", "decoder_time"],
|
| 77 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
|
|
|
| 96 |
"avg": "Averages",
|
| 97 |
"none": "Baseline",
|
| 98 |
},
|
| 99 |
+
"attacks_with_variations": [
|
| 100 |
+
"center_crop",
|
| 101 |
+
"jpeg",
|
| 102 |
+
"brightness",
|
| 103 |
+
"contrast",
|
| 104 |
+
"saturation",
|
| 105 |
+
"sharpness",
|
| 106 |
+
"resize",
|
| 107 |
+
"perspective",
|
| 108 |
+
"median_filter",
|
| 109 |
+
"hue",
|
| 110 |
+
"gaussian_blur",
|
| 111 |
+
],
|
| 112 |
}
|
| 113 |
+
elif dataset_name == "sav_val_full/video":
|
| 114 |
return {
|
| 115 |
"first_cols": ["psnr", "ssim", "msssim", "lpips", "vmaf", "decoder_time"],
|
| 116 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
|
|
|
| 137 |
"H264_Crop_Brightness2": "Mixed",
|
| 138 |
"H264_Crop_Brightness3": "Mixed",
|
| 139 |
},
|
| 140 |
+
"attacks_with_variations": [
|
| 141 |
+
"Rotate",
|
| 142 |
+
"Resize",
|
| 143 |
+
"Crop",
|
| 144 |
+
"Brightness",
|
| 145 |
+
"Contrast",
|
| 146 |
+
"Saturation",
|
| 147 |
+
"H264",
|
| 148 |
+
"H264rgb",
|
| 149 |
+
"H265",
|
| 150 |
+
],
|
| 151 |
}
|
| 152 |
else:
|
| 153 |
raise ValueError(f"Unknown dataset: {dataset_name}")
|
frontend/src/App.tsx
CHANGED
|
@@ -1,17 +1,32 @@
|
|
| 1 |
-
import { useState } from 'react'
|
| 2 |
import Examples from './components/Examples'
|
| 3 |
-
import
|
| 4 |
|
| 5 |
function App() {
|
| 6 |
const [activeTab, setActiveTab] = useState<
|
| 7 |
'leaderboard' | 'imageExamples' | 'audioExamples' | 'videoExamples'
|
| 8 |
>('leaderboard')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
return (
|
| 11 |
<div className="min-h-screen w-11/12 mx-auto">
|
| 12 |
-
<div className="
|
| 13 |
-
<div className="
|
| 14 |
<h2 className="card-title">🥇 Omni Seal Bench Watermarking Leaderboard</h2>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
</div>
|
| 16 |
</div>
|
| 17 |
|
|
@@ -24,8 +39,8 @@ function App() {
|
|
| 24 |
checked={activeTab === 'leaderboard'}
|
| 25 |
onChange={() => setActiveTab('leaderboard')}
|
| 26 |
/>
|
| 27 |
-
<div className="tab-content bg-base-100
|
| 28 |
-
<
|
| 29 |
</div>
|
| 30 |
|
| 31 |
<input
|
|
@@ -36,8 +51,8 @@ function App() {
|
|
| 36 |
checked={activeTab === 'imageExamples'}
|
| 37 |
onChange={() => setActiveTab('imageExamples')}
|
| 38 |
/>
|
| 39 |
-
<div className="tab-content bg-base-100
|
| 40 |
-
<Examples fileType="image" />
|
| 41 |
</div>
|
| 42 |
|
| 43 |
<input
|
|
@@ -48,8 +63,8 @@ function App() {
|
|
| 48 |
checked={activeTab === 'audioExamples'}
|
| 49 |
onChange={() => setActiveTab('audioExamples')}
|
| 50 |
/>
|
| 51 |
-
<div className="tab-content bg-base-100
|
| 52 |
-
<Examples fileType="audio" />
|
| 53 |
</div>
|
| 54 |
|
| 55 |
<input
|
|
@@ -60,8 +75,8 @@ function App() {
|
|
| 60 |
checked={activeTab === 'videoExamples'}
|
| 61 |
onChange={() => setActiveTab('videoExamples')}
|
| 62 |
/>
|
| 63 |
-
<div className="tab-content bg-base-100
|
| 64 |
-
<Examples fileType="video" />
|
| 65 |
</div>
|
| 66 |
</div>
|
| 67 |
</div>
|
|
|
|
| 1 |
+
import { useState, useEffect } from 'react'
|
| 2 |
import Examples from './components/Examples'
|
| 3 |
+
import LeaderboardPage from './components/LeaderboardPage'
|
| 4 |
|
| 5 |
function App() {
|
| 6 |
const [activeTab, setActiveTab] = useState<
|
| 7 |
'leaderboard' | 'imageExamples' | 'audioExamples' | 'videoExamples'
|
| 8 |
>('leaderboard')
|
| 9 |
+
const [theme, setTheme] = useState<'dark' | 'light'>('dark')
|
| 10 |
+
|
| 11 |
+
useEffect(() => {
|
| 12 |
+
document.documentElement.setAttribute('data-theme', theme)
|
| 13 |
+
}, [theme])
|
| 14 |
|
| 15 |
return (
|
| 16 |
<div className="min-h-screen w-11/12 mx-auto">
|
| 17 |
+
<div className="bg-base-100 my-4">
|
| 18 |
+
<div className="flex flex-row justify-between items-center">
|
| 19 |
<h2 className="card-title">🥇 Omni Seal Bench Watermarking Leaderboard</h2>
|
| 20 |
+
<div className="flex justify-end items-center gap-2">
|
| 21 |
+
<span className="text-sm">{theme === 'dark' ? '🌙 Dark Mode' : '☀️ Light Mode'}</span>
|
| 22 |
+
<input
|
| 23 |
+
type="checkbox"
|
| 24 |
+
className="toggle"
|
| 25 |
+
checked={theme === 'dark'}
|
| 26 |
+
onChange={() => setTheme(theme === 'dark' ? 'light' : 'dark')}
|
| 27 |
+
aria-label="Toggle dark mode"
|
| 28 |
+
/>
|
| 29 |
+
</div>
|
| 30 |
</div>
|
| 31 |
</div>
|
| 32 |
|
|
|
|
| 39 |
checked={activeTab === 'leaderboard'}
|
| 40 |
onChange={() => setActiveTab('leaderboard')}
|
| 41 |
/>
|
| 42 |
+
<div className="tab-content bg-base-100 ">
|
| 43 |
+
<LeaderboardPage />
|
| 44 |
</div>
|
| 45 |
|
| 46 |
<input
|
|
|
|
| 51 |
checked={activeTab === 'imageExamples'}
|
| 52 |
onChange={() => setActiveTab('imageExamples')}
|
| 53 |
/>
|
| 54 |
+
<div className="tab-content bg-base-100 ">
|
| 55 |
+
{activeTab === 'imageExamples' ? <Examples fileType="image" /> : null}
|
| 56 |
</div>
|
| 57 |
|
| 58 |
<input
|
|
|
|
| 63 |
checked={activeTab === 'audioExamples'}
|
| 64 |
onChange={() => setActiveTab('audioExamples')}
|
| 65 |
/>
|
| 66 |
+
<div className="tab-content bg-base-100 ">
|
| 67 |
+
{activeTab === 'audioExamples' ? <Examples fileType="audio" /> : null}
|
| 68 |
</div>
|
| 69 |
|
| 70 |
<input
|
|
|
|
| 75 |
checked={activeTab === 'videoExamples'}
|
| 76 |
onChange={() => setActiveTab('videoExamples')}
|
| 77 |
/>
|
| 78 |
+
<div className="tab-content bg-base-100 ">
|
| 79 |
+
{activeTab === 'videoExamples' ? <Examples fileType="video" /> : null}
|
| 80 |
</div>
|
| 81 |
</div>
|
| 82 |
</div>
|
frontend/src/components/DatasetSelector.tsx
CHANGED
|
@@ -1,31 +1,31 @@
|
|
| 1 |
import React from 'react'
|
| 2 |
|
| 3 |
interface DatasetSelectorProps {
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
}
|
| 8 |
|
| 9 |
const DatasetSelector: React.FC<DatasetSelectorProps> = ({
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
}) => {
|
| 14 |
return (
|
| 15 |
<div className="mb-4">
|
| 16 |
-
<fieldset className="fieldset w-full p-4 rounded border">
|
| 17 |
<legend className="fieldset-legend font-semibold">Dataset</legend>
|
| 18 |
<div className="flex flex-wrap gap-2">
|
| 19 |
-
{
|
| 20 |
-
<label key={
|
| 21 |
<input
|
| 22 |
type="radio"
|
| 23 |
name="dataset"
|
| 24 |
className="radio radio-sm"
|
| 25 |
-
checked={
|
| 26 |
-
onChange={() =>
|
| 27 |
/>
|
| 28 |
-
<span className="text-sm">{
|
| 29 |
</label>
|
| 30 |
))}
|
| 31 |
</div>
|
|
|
|
| 1 |
import React from 'react'
|
| 2 |
|
| 3 |
interface DatasetSelectorProps {
|
| 4 |
+
datasetNames: string[]
|
| 5 |
+
selectedDatasetName: string
|
| 6 |
+
onDatasetNameChange: (datasetName: string) => void
|
| 7 |
}
|
| 8 |
|
| 9 |
const DatasetSelector: React.FC<DatasetSelectorProps> = ({
|
| 10 |
+
datasetNames,
|
| 11 |
+
selectedDatasetName,
|
| 12 |
+
onDatasetNameChange,
|
| 13 |
}) => {
|
| 14 |
return (
|
| 15 |
<div className="mb-4">
|
| 16 |
+
<fieldset className="fieldset w-full p-4 rounded border border-gray-700">
|
| 17 |
<legend className="fieldset-legend font-semibold">Dataset</legend>
|
| 18 |
<div className="flex flex-wrap gap-2">
|
| 19 |
+
{datasetNames.map((datasetName) => (
|
| 20 |
+
<label key={datasetName} className="flex items-center gap-2 cursor-pointer">
|
| 21 |
<input
|
| 22 |
type="radio"
|
| 23 |
name="dataset"
|
| 24 |
className="radio radio-sm"
|
| 25 |
+
checked={selectedDatasetName === datasetName}
|
| 26 |
+
onChange={() => onDatasetNameChange(datasetName)}
|
| 27 |
/>
|
| 28 |
+
<span className="text-sm">{datasetName}</span>
|
| 29 |
</label>
|
| 30 |
))}
|
| 31 |
</div>
|
frontend/src/components/Examples.tsx
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import React, { useState, useEffect } from 'react'
|
| 2 |
import API from '../API'
|
| 3 |
import AudioPlayer from './AudioPlayer'
|
|
|
|
| 4 |
|
| 5 |
interface ExamplesProps {
|
| 6 |
fileType: 'image' | 'audio' | 'video'
|
|
@@ -131,7 +132,7 @@ const Examples = ({ fileType }: ExamplesProps) => {
|
|
| 131 |
)}
|
| 132 |
</div>
|
| 133 |
|
| 134 |
-
{loading && <
|
| 135 |
{error && <p className="error">Error: {error}</p>}
|
| 136 |
|
| 137 |
{selectedModel && selectedAttack && (
|
|
|
|
| 1 |
import React, { useState, useEffect } from 'react'
|
| 2 |
import API from '../API'
|
| 3 |
import AudioPlayer from './AudioPlayer'
|
| 4 |
+
import LoadingSpinner from './LoadingSpinner'
|
| 5 |
|
| 6 |
interface ExamplesProps {
|
| 7 |
fileType: 'image' | 'audio' | 'video'
|
|
|
|
| 132 |
)}
|
| 133 |
</div>
|
| 134 |
|
| 135 |
+
{loading && <LoadingSpinner />}
|
| 136 |
{error && <p className="error">Error: {error}</p>}
|
| 137 |
|
| 138 |
{selectedModel && selectedAttack && (
|
frontend/src/components/LeaderBoardPage.tsx
DELETED
|
@@ -1,34 +0,0 @@
|
|
| 1 |
-
import React, { useState } from 'react'
|
| 2 |
-
import DatasetSelector from './DatasetSelector'
|
| 3 |
-
import LeaderboardTable from './LeaderboardTable'
|
| 4 |
-
import DataChart from './DataChart'
|
| 5 |
-
|
| 6 |
-
const LeaderBoardPage: React.FC = () => {
|
| 7 |
-
const datasets = [
|
| 8 |
-
'voxpopuli_1k_audio',
|
| 9 |
-
'ravdess_1k_audio',
|
| 10 |
-
'val2014_1k_image',
|
| 11 |
-
'sav_val_full_video',
|
| 12 |
-
]
|
| 13 |
-
const [selectedDataset, setSelectedDataset] = useState('voxpopuli_1k_audio')
|
| 14 |
-
|
| 15 |
-
return (
|
| 16 |
-
<div className="space-y-6">
|
| 17 |
-
<DatasetSelector
|
| 18 |
-
datasets={datasets}
|
| 19 |
-
selectedDataset={selectedDataset}
|
| 20 |
-
onDatasetChange={setSelectedDataset}
|
| 21 |
-
/>
|
| 22 |
-
|
| 23 |
-
<div className="space-y-8">
|
| 24 |
-
<LeaderboardTable dataset={selectedDataset} />
|
| 25 |
-
<div className="mt-8 pt-4 border-t border-gray-200">
|
| 26 |
-
<h3 className="text-lg font-semibold mb-4">Performance Chart</h3>
|
| 27 |
-
<DataChart dataset={selectedDataset} />
|
| 28 |
-
</div>
|
| 29 |
-
</div>
|
| 30 |
-
</div>
|
| 31 |
-
)
|
| 32 |
-
}
|
| 33 |
-
|
| 34 |
-
export default LeaderBoardPage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/LeaderboardChart.tsx
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useEffect, useState } from 'react'
|
| 2 |
+
import {
|
| 3 |
+
LineChart,
|
| 4 |
+
Line,
|
| 5 |
+
XAxis,
|
| 6 |
+
YAxis,
|
| 7 |
+
CartesianGrid,
|
| 8 |
+
Tooltip,
|
| 9 |
+
Legend,
|
| 10 |
+
ResponsiveContainer,
|
| 11 |
+
} from 'recharts'
|
| 12 |
+
import API from '../API'
|
| 13 |
+
import LoadingSpinner from './LoadingSpinner'
|
| 14 |
+
|
| 15 |
+
interface LeaderboardChartProps {
|
| 16 |
+
dataset: string
|
| 17 |
+
selectedModels: Set<string>
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
interface Row {
|
| 21 |
+
metric: string
|
| 22 |
+
[key: string]: string | number
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
const MetricSelector = ({
|
| 26 |
+
metrics,
|
| 27 |
+
selectedMetric,
|
| 28 |
+
onMetricChange,
|
| 29 |
+
}: {
|
| 30 |
+
metrics: Set<string>
|
| 31 |
+
selectedMetric: string | null
|
| 32 |
+
onMetricChange: (event: React.ChangeEvent<HTMLSelectElement>) => void
|
| 33 |
+
}) => {
|
| 34 |
+
return (
|
| 35 |
+
<fieldset className="fieldset">
|
| 36 |
+
<legend className="fieldset-legend">Metric</legend>
|
| 37 |
+
<select
|
| 38 |
+
id="metric-selector"
|
| 39 |
+
value={selectedMetric || ''}
|
| 40 |
+
onChange={onMetricChange}
|
| 41 |
+
className="select select-bordered w-full"
|
| 42 |
+
>
|
| 43 |
+
{[...metrics].map((metric) => (
|
| 44 |
+
<option key={metric} value={metric}>
|
| 45 |
+
{metric}
|
| 46 |
+
</option>
|
| 47 |
+
))}
|
| 48 |
+
</select>
|
| 49 |
+
</fieldset>
|
| 50 |
+
)
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
const AttackSelector = ({
|
| 54 |
+
attacks,
|
| 55 |
+
selectedAttack,
|
| 56 |
+
onAttackChange,
|
| 57 |
+
}: {
|
| 58 |
+
attacks: Set<string>
|
| 59 |
+
selectedAttack: string | null
|
| 60 |
+
onAttackChange: (event: React.ChangeEvent<HTMLSelectElement>) => void
|
| 61 |
+
}) => {
|
| 62 |
+
return (
|
| 63 |
+
<fieldset className="fieldset mb-4">
|
| 64 |
+
<legend className="fieldset-legend">Attack</legend>
|
| 65 |
+
<select
|
| 66 |
+
id="attack-selector"
|
| 67 |
+
value={selectedAttack || ''}
|
| 68 |
+
onChange={onAttackChange}
|
| 69 |
+
className="select select-bordered w-full"
|
| 70 |
+
>
|
| 71 |
+
{[...attacks].map((attack) => (
|
| 72 |
+
<option key={attack} value={attack}>
|
| 73 |
+
{attack}
|
| 74 |
+
</option>
|
| 75 |
+
))}
|
| 76 |
+
</select>
|
| 77 |
+
</fieldset>
|
| 78 |
+
)
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
const LeaderboardChart = ({ dataset, selectedModels }: LeaderboardChartProps) => {
|
| 82 |
+
const [chartData, setChartData] = useState<Row[]>([])
|
| 83 |
+
const [loading, setLoading] = useState(true)
|
| 84 |
+
const [error, setError] = useState<string | null>(null)
|
| 85 |
+
const [metrics, setMetrics] = useState<Set<string>>(new Set())
|
| 86 |
+
const [attacks, setAttacks] = useState<Set<string>>(new Set())
|
| 87 |
+
const [selectedMetric, setSelectedMetric] = useState<string | null>(null)
|
| 88 |
+
const [selectedAttack, setSelectedAttack] = useState<string | null>(null)
|
| 89 |
+
|
| 90 |
+
useEffect(() => {
|
| 91 |
+
setLoading(true)
|
| 92 |
+
API.fetchStaticFile(`data/${dataset}?dataset_type=attacks_variations`)
|
| 93 |
+
.then((response) => {
|
| 94 |
+
const data = JSON.parse(response)
|
| 95 |
+
const rows: Row[] = data['all_attacks_df'].map((row: any) => {
|
| 96 |
+
const newRow: Row = { ...row }
|
| 97 |
+
// Convert strength value to number if it exists and is a string
|
| 98 |
+
if (typeof newRow.strength === 'string') {
|
| 99 |
+
newRow.strength = parseFloat(newRow.strength)
|
| 100 |
+
}
|
| 101 |
+
return newRow
|
| 102 |
+
})
|
| 103 |
+
|
| 104 |
+
setSelectedMetric(data['metrics'][0])
|
| 105 |
+
setMetrics(new Set(data['metrics']))
|
| 106 |
+
setSelectedAttack(data['attacks_with_variations'][0])
|
| 107 |
+
setAttacks(new Set(data['attacks_with_variations']))
|
| 108 |
+
setChartData(rows)
|
| 109 |
+
setLoading(false)
|
| 110 |
+
})
|
| 111 |
+
.catch((err) => {
|
| 112 |
+
setError('Failed to fetch JSON: ' + err.message)
|
| 113 |
+
setLoading(false)
|
| 114 |
+
})
|
| 115 |
+
}, [dataset])
|
| 116 |
+
|
| 117 |
+
const handleMetricChange = (event: React.ChangeEvent<HTMLSelectElement>) => {
|
| 118 |
+
setSelectedMetric(event.target.value)
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
const handleAttackChange = (event: React.ChangeEvent<HTMLSelectElement>) => {
|
| 122 |
+
setSelectedAttack(event.target.value)
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
// Sort the chart data by the 'strength' field before rendering
|
| 126 |
+
const sortedChartData = chartData
|
| 127 |
+
.filter((row) => !selectedAttack || row.attack === selectedAttack)
|
| 128 |
+
.sort((a, b) => (a.strength as number) - (b.strength as number))
|
| 129 |
+
|
| 130 |
+
return (
|
| 131 |
+
<div className="rounded shadow p-4 overflow-auto mb-8">
|
| 132 |
+
{loading && <LoadingSpinner />}
|
| 133 |
+
{error && <div className="text-red-500">{error}</div>}
|
| 134 |
+
{!loading && !error && (
|
| 135 |
+
<>
|
| 136 |
+
<div className="flex flex-col md:flex-row md:gap-x-4 mb-4">
|
| 137 |
+
<div className="w-full md:w-1/2">
|
| 138 |
+
<MetricSelector
|
| 139 |
+
metrics={metrics}
|
| 140 |
+
selectedMetric={selectedMetric}
|
| 141 |
+
onMetricChange={handleMetricChange}
|
| 142 |
+
/>
|
| 143 |
+
</div>
|
| 144 |
+
<div className="w-full md:w-1/2">
|
| 145 |
+
<AttackSelector
|
| 146 |
+
attacks={attacks}
|
| 147 |
+
selectedAttack={selectedAttack}
|
| 148 |
+
onAttackChange={handleAttackChange}
|
| 149 |
+
/>
|
| 150 |
+
</div>
|
| 151 |
+
</div>
|
| 152 |
+
|
| 153 |
+
{chartData.length > 0 && (
|
| 154 |
+
<div className="h-64 mb-4">
|
| 155 |
+
<ResponsiveContainer width="100%" height="100%">
|
| 156 |
+
<LineChart
|
| 157 |
+
data={sortedChartData}
|
| 158 |
+
margin={{
|
| 159 |
+
top: 5,
|
| 160 |
+
right: 30,
|
| 161 |
+
left: 20,
|
| 162 |
+
bottom: 5,
|
| 163 |
+
}}
|
| 164 |
+
>
|
| 165 |
+
<CartesianGrid strokeDasharray="3 3" />
|
| 166 |
+
<XAxis
|
| 167 |
+
dataKey="strength"
|
| 168 |
+
domain={[
|
| 169 |
+
Math.min(...sortedChartData.map((item) => Number(item.strength))),
|
| 170 |
+
Math.max(...sortedChartData.map((item) => Number(item.strength))),
|
| 171 |
+
]}
|
| 172 |
+
type="number"
|
| 173 |
+
tickFormatter={(value) => value.toFixed(3)}
|
| 174 |
+
label={{ value: 'Strength', position: 'insideBottomRight', offset: -5 }}
|
| 175 |
+
/>
|
| 176 |
+
<YAxis
|
| 177 |
+
label={{
|
| 178 |
+
value: selectedMetric || '',
|
| 179 |
+
angle: -90,
|
| 180 |
+
position: 'insideLeft',
|
| 181 |
+
style: { textAnchor: 'middle' },
|
| 182 |
+
}}
|
| 183 |
+
tickFormatter={(value) => value.toFixed(3)}
|
| 184 |
+
/>
|
| 185 |
+
<Tooltip
|
| 186 |
+
contentStyle={{
|
| 187 |
+
backgroundColor: '#2a303c',
|
| 188 |
+
borderColor: '#374151',
|
| 189 |
+
color: 'white',
|
| 190 |
+
}}
|
| 191 |
+
formatter={(value: number) => value.toFixed(3)}
|
| 192 |
+
/>
|
| 193 |
+
<Legend />
|
| 194 |
+
|
| 195 |
+
{(() => {
|
| 196 |
+
// Ensure selectedMetric is not null before rendering the Line components
|
| 197 |
+
if (!selectedMetric) return null // Do not render lines if no metric is selected
|
| 198 |
+
|
| 199 |
+
// Get unique models from the filtered and sorted data
|
| 200 |
+
const models = new Set(
|
| 201 |
+
sortedChartData
|
| 202 |
+
.filter((row) => selectedModels.has(row.model as string))
|
| 203 |
+
.map((row) => row.model)
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
// Generate different colors for each model
|
| 207 |
+
const colors = [
|
| 208 |
+
'#8884d8',
|
| 209 |
+
'#82ca9d',
|
| 210 |
+
'#ffc658',
|
| 211 |
+
'#ff8042',
|
| 212 |
+
'#0088fe',
|
| 213 |
+
'#00C49F',
|
| 214 |
+
]
|
| 215 |
+
|
| 216 |
+
// Return a Line component for each model
|
| 217 |
+
return [...models].map((model, index) => {
|
| 218 |
+
return (
|
| 219 |
+
<Line
|
| 220 |
+
key={model as string}
|
| 221 |
+
type="monotone"
|
| 222 |
+
dataKey={selectedMetric as string} // Ensure selectedMetric is a string
|
| 223 |
+
data={sortedChartData.filter((row) => row.model === model)}
|
| 224 |
+
name={model as string}
|
| 225 |
+
stroke={colors[index % colors.length]}
|
| 226 |
+
dot={false}
|
| 227 |
+
/>
|
| 228 |
+
)
|
| 229 |
+
})
|
| 230 |
+
})()}
|
| 231 |
+
</LineChart>
|
| 232 |
+
</ResponsiveContainer>
|
| 233 |
+
</div>
|
| 234 |
+
)}
|
| 235 |
+
</>
|
| 236 |
+
)}
|
| 237 |
+
</div>
|
| 238 |
+
)
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
export default LeaderboardChart
|
frontend/src/components/LeaderboardFilter.tsx
CHANGED
|
@@ -173,7 +173,7 @@ const LeaderboardFilter: React.FC<FilterProps> = ({
|
|
| 173 |
<input
|
| 174 |
type="text"
|
| 175 |
placeholder="Search metrics..."
|
| 176 |
-
className="input input-bordered border-
|
| 177 |
value={searchTerm}
|
| 178 |
onChange={(e) => {
|
| 179 |
const value = e.target.value
|
|
|
|
| 173 |
<input
|
| 174 |
type="text"
|
| 175 |
placeholder="Search metrics..."
|
| 176 |
+
className="input input-bordered border-gray-300 input-sm w-48 pr-8"
|
| 177 |
value={searchTerm}
|
| 178 |
onChange={(e) => {
|
| 179 |
const value = e.target.value
|
frontend/src/components/LeaderboardPage.tsx
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import React, { useState, useEffect } from 'react'
|
| 2 |
+
import DatasetSelector from './DatasetSelector'
|
| 3 |
+
import LeaderboardTable from './LeaderboardTable'
|
| 4 |
+
import LeaderboardChart from './LeaderboardChart'
|
| 5 |
+
import ModelFilter from './ModelFilter'
|
| 6 |
+
import API from '../API'
|
| 7 |
+
import LoadingSpinner from './LoadingSpinner'
|
| 8 |
+
|
| 9 |
+
const LeaderboardPage: React.FC = () => {
|
| 10 |
+
const datasetNames = [
|
| 11 |
+
'voxpopuli_1k/audio',
|
| 12 |
+
'ravdess_1k/audio',
|
| 13 |
+
'val2014_1k/image',
|
| 14 |
+
'sav_val_full/video',
|
| 15 |
+
]
|
| 16 |
+
const [selectedDatasetName, setSelectedDatasetName] = useState(datasetNames[0])
|
| 17 |
+
const [models, setModels] = useState<string[]>([])
|
| 18 |
+
const [selectedModels, setSelectedModels] = useState<Set<string>>(new Set())
|
| 19 |
+
const [loading, setLoading] = useState(true)
|
| 20 |
+
const [benchmarkData, setBenchmarkData] = useState<any>(null)
|
| 21 |
+
|
| 22 |
+
// Fetch available models when dataset changes
|
| 23 |
+
useEffect(() => {
|
| 24 |
+
setLoading(true)
|
| 25 |
+
API.fetchStaticFile(`data/${selectedDatasetName}?dataset_type=benchmark`)
|
| 26 |
+
.then((response) => {
|
| 27 |
+
const data = JSON.parse(response)
|
| 28 |
+
setBenchmarkData(data)
|
| 29 |
+
const rows = data['rows']
|
| 30 |
+
const allKeys: string[] = Array.from(new Set(rows.flatMap((row: any) => Object.keys(row))))
|
| 31 |
+
// Remove 'metric' from headers if it exists
|
| 32 |
+
const headers = allKeys.filter((key) => key !== 'metric')
|
| 33 |
+
|
| 34 |
+
setModels(headers)
|
| 35 |
+
// Initialize all models as selected
|
| 36 |
+
setSelectedModels(new Set(headers))
|
| 37 |
+
setLoading(false)
|
| 38 |
+
})
|
| 39 |
+
.catch((err) => {
|
| 40 |
+
console.error('Failed to fetch models:', err)
|
| 41 |
+
setLoading(false)
|
| 42 |
+
})
|
| 43 |
+
}, [selectedDatasetName])
|
| 44 |
+
|
| 45 |
+
return (
|
| 46 |
+
<div className="">
|
| 47 |
+
<div className="flex flex-col gap-4">
|
| 48 |
+
<DatasetSelector
|
| 49 |
+
datasetNames={datasetNames}
|
| 50 |
+
selectedDatasetName={selectedDatasetName}
|
| 51 |
+
onDatasetNameChange={setSelectedDatasetName}
|
| 52 |
+
/>
|
| 53 |
+
</div>
|
| 54 |
+
{loading ? (
|
| 55 |
+
<LoadingSpinner />
|
| 56 |
+
) : (
|
| 57 |
+
<>
|
| 58 |
+
{models.length > 0 && (
|
| 59 |
+
<ModelFilter
|
| 60 |
+
models={models}
|
| 61 |
+
selectedModels={selectedModels}
|
| 62 |
+
setSelectedModels={setSelectedModels}
|
| 63 |
+
/>
|
| 64 |
+
)}
|
| 65 |
+
<div className="space-y-8">
|
| 66 |
+
<LeaderboardTable benchmarkData={benchmarkData} selectedModels={selectedModels} />
|
| 67 |
+
<div className="mt-8 pt-4 border-t border-gray-200">
|
| 68 |
+
<LeaderboardChart dataset={selectedDatasetName} selectedModels={selectedModels} />
|
| 69 |
+
</div>
|
| 70 |
+
</div>
|
| 71 |
+
</>
|
| 72 |
+
)}
|
| 73 |
+
</div>
|
| 74 |
+
)
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
export default LeaderboardPage
|
frontend/src/components/LeaderboardTable.tsx
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
import React, { useEffect, useState } from 'react'
|
| 2 |
-
import API from '../API'
|
| 3 |
import LeaderboardFilter from './LeaderboardFilter'
|
| 4 |
-
import
|
| 5 |
|
| 6 |
interface LeaderboardTableProps {
|
| 7 |
-
|
|
|
|
| 8 |
}
|
| 9 |
|
| 10 |
interface Row {
|
|
@@ -16,119 +16,126 @@ interface Groups {
|
|
| 16 |
[group: string]: { [subgroup: string]: string[] }
|
| 17 |
}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
}
|
| 23 |
|
| 24 |
-
const LeaderboardTable: React.FC<LeaderboardTableProps> = ({
|
| 25 |
const [tableRows, setTableRows] = useState<Row[]>([])
|
| 26 |
const [tableHeader, setTableHeader] = useState<string[]>([])
|
| 27 |
-
const [loading, setLoading] = useState(true)
|
| 28 |
const [error, setError] = useState<string | null>(null)
|
| 29 |
const [groups, setGroups] = useState<Groups>({})
|
| 30 |
const [openGroups, setOpenGroups] = useState<{ [key: string]: boolean }>({})
|
| 31 |
const [openSubGroups, setOpenSubGroups] = useState<{ [key: string]: { [key: string]: boolean } }>(
|
| 32 |
{}
|
| 33 |
)
|
| 34 |
-
|
| 35 |
const [selectedMetrics, setSelectedMetrics] = useState<Set<string>>(new Set())
|
| 36 |
-
const [selectedModels, setSelectedModels] = useState<Set<string>>(new Set())
|
| 37 |
-
|
| 38 |
-
// To store the unique metrics from the Overall group
|
| 39 |
const [overallMetrics, setOverallMetrics] = useState<string[]>([])
|
|
|
|
| 40 |
|
| 41 |
useEffect(() => {
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
})
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
// Sort metrics to ensure consistent subgroup order
|
| 75 |
-
const sortedMetrics = [...metrics].sort()
|
| 76 |
-
|
| 77 |
-
// Create and sort subgroups
|
| 78 |
-
acc[group] = sortedMetrics.reduce<{ [key: string]: string[] }>((subAcc, metric) => {
|
| 79 |
-
const [mainGroup, subGroup] = metric.split('_')
|
| 80 |
-
if (!subAcc[mainGroup]) {
|
| 81 |
-
subAcc[mainGroup] = []
|
| 82 |
-
}
|
| 83 |
-
subAcc[mainGroup].push(metric)
|
| 84 |
-
return subAcc
|
| 85 |
-
}, {})
|
| 86 |
-
|
| 87 |
-
// Convert to sorted entries and back to object
|
| 88 |
-
acc[group] = Object.fromEntries(
|
| 89 |
-
Object.entries(acc[group]).sort(([subGroupA], [subGroupB]) =>
|
| 90 |
-
subGroupA.localeCompare(subGroupB)
|
| 91 |
-
)
|
| 92 |
)
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
Object.keys(groupsData).forEach((group) => {
|
| 108 |
-
initialOpenGroups[group] = false
|
| 109 |
-
initialOpenSubGroups[group] = {}
|
| 110 |
-
Object.keys(groupsData[group]).forEach((subGroup) => {
|
| 111 |
-
initialOpenSubGroups[group][subGroup] = false
|
| 112 |
-
})
|
| 113 |
})
|
| 114 |
-
|
| 115 |
-
// Get all metrics from all groups
|
| 116 |
-
const allMetrics = Object.values(groups).flat()
|
| 117 |
-
setSelectedMetrics(new Set(allMetrics))
|
| 118 |
-
// Initialize all models as selected
|
| 119 |
-
setSelectedModels(new Set(headers))
|
| 120 |
-
setTableHeader(headers)
|
| 121 |
-
setTableRows(rows)
|
| 122 |
-
setGroups(groupsData)
|
| 123 |
-
setOpenGroups(initialOpenGroups)
|
| 124 |
-
setOpenSubGroups(initialOpenSubGroups)
|
| 125 |
-
setLoading(false)
|
| 126 |
-
})
|
| 127 |
-
.catch((err) => {
|
| 128 |
-
setError('Failed to fetch JSON: ' + err.message)
|
| 129 |
-
setLoading(false)
|
| 130 |
})
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
const toggleGroup = (group: string) => {
|
| 134 |
setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
|
|
@@ -227,23 +234,21 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
|
|
| 227 |
}
|
| 228 |
|
| 229 |
return (
|
| 230 |
-
<div className="rounded shadow
|
| 231 |
-
{loading && <div>Loading...</div>}
|
| 232 |
{error && <div className="text-red-500">{error}</div>}
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
<div className="overflow-x-auto">
|
| 236 |
<div className="flex flex-col gap-4">
|
| 237 |
-
<
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
/>
|
| 242 |
-
<LeaderboardFilter
|
| 243 |
groups={groups}
|
| 244 |
selectedMetrics={selectedMetrics}
|
| 245 |
setSelectedMetrics={setSelectedMetrics}
|
| 246 |
-
/>
|
| 247 |
</div>
|
| 248 |
|
| 249 |
{selectedModels.size === 0 || selectedMetrics.size === 0 ? (
|
|
@@ -252,332 +257,25 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
|
|
| 252 |
</div>
|
| 253 |
) : (
|
| 254 |
<>
|
| 255 |
-
|
| 256 |
-
<thead>
|
| 257 |
-
<tr>
|
| 258 |
-
<th>Group / Subgroup</th>
|
| 259 |
-
{overallMetrics.map((metric) => (
|
| 260 |
-
<th
|
| 261 |
-
key={metric}
|
| 262 |
-
colSpan={tableHeader.filter((model) => selectedModels.has(model)).length}
|
| 263 |
-
className="text-center border-x"
|
| 264 |
-
>
|
| 265 |
-
{metric}
|
| 266 |
-
</th>
|
| 267 |
-
))}
|
| 268 |
-
</tr>
|
| 269 |
-
<tr>
|
| 270 |
-
<th></th>
|
| 271 |
-
{overallMetrics.map((metric) => (
|
| 272 |
-
<React.Fragment key={`header-models-${metric}`}>
|
| 273 |
-
{tableHeader
|
| 274 |
-
.filter((model) => selectedModels.has(model))
|
| 275 |
-
.map((model) => (
|
| 276 |
-
<th key={`${metric}-${model}`} className="text-center text-xs">
|
| 277 |
-
{model}
|
| 278 |
-
</th>
|
| 279 |
-
))}
|
| 280 |
-
</React.Fragment>
|
| 281 |
-
))}
|
| 282 |
-
</tr>
|
| 283 |
-
</thead>
|
| 284 |
-
<tbody>
|
| 285 |
-
{/* First render each group */}
|
| 286 |
-
{Object.entries(groups).map(([group, subGroups]) => {
|
| 287 |
-
// Skip the "Overall" group completely
|
| 288 |
-
if (group === 'Overall') return null
|
| 289 |
-
|
| 290 |
-
// Get all metrics for this group
|
| 291 |
-
const allGroupMetrics = Object.values(subGroups).flat()
|
| 292 |
-
// Filter to only include selected metrics
|
| 293 |
-
const visibleGroupMetrics = filterMetricsByGroupAndSubgroup(
|
| 294 |
-
allGroupMetrics,
|
| 295 |
-
group
|
| 296 |
-
)
|
| 297 |
-
|
| 298 |
-
// Skip this group if no metrics are selected
|
| 299 |
-
if (visibleGroupMetrics.length === 0) return null
|
| 300 |
-
|
| 301 |
-
return (
|
| 302 |
-
<React.Fragment key={group}>
|
| 303 |
-
{/* Group row with average stats for the entire group */}
|
| 304 |
-
<tr
|
| 305 |
-
className="bg-base-200 cursor-pointer hover:bg-base-300"
|
| 306 |
-
onClick={() => toggleGroup(group)}
|
| 307 |
-
>
|
| 308 |
-
<td className="font-medium">
|
| 309 |
-
{openGroups[group] ? '▼ ' : '▶ '}
|
| 310 |
-
{group}
|
| 311 |
-
</td>
|
| 312 |
-
{/* For each metric column */}
|
| 313 |
-
{overallMetrics.map((metric) => (
|
| 314 |
-
// Render sub-columns for each model
|
| 315 |
-
<React.Fragment key={`${group}-${metric}`}>
|
| 316 |
-
{tableHeader
|
| 317 |
-
.filter((model) => selectedModels.has(model))
|
| 318 |
-
.map((col) => {
|
| 319 |
-
// Find all metrics in this group that match the current metric name
|
| 320 |
-
const allMetricsWithName = findAllMetricsForName(metric)
|
| 321 |
-
const metricsInGroupForThisMetric = visibleGroupMetrics.filter(
|
| 322 |
-
(m) => allMetricsWithName.includes(m)
|
| 323 |
-
)
|
| 324 |
-
const stats = calculateStats(metricsInGroupForThisMetric, col)
|
| 325 |
-
|
| 326 |
-
return (
|
| 327 |
-
<td
|
| 328 |
-
key={`${group}-${metric}-${col}`}
|
| 329 |
-
className="font-medium text-center"
|
| 330 |
-
>
|
| 331 |
-
{!isNaN(stats.avg)
|
| 332 |
-
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
| 333 |
-
: 'N/A'}
|
| 334 |
-
</td>
|
| 335 |
-
)
|
| 336 |
-
})}
|
| 337 |
-
</React.Fragment>
|
| 338 |
-
))}
|
| 339 |
-
</tr>
|
| 340 |
-
|
| 341 |
-
{/* Only render subgroups if group is open */}
|
| 342 |
-
{openGroups[group] &&
|
| 343 |
-
Object.entries(subGroups).map(([subGroup, metrics]) => {
|
| 344 |
-
// Filter to only include selected metrics in this subgroup
|
| 345 |
-
const visibleSubgroupMetrics = filterMetricsByGroupAndSubgroup(
|
| 346 |
-
metrics,
|
| 347 |
-
group,
|
| 348 |
-
subGroup
|
| 349 |
-
)
|
| 350 |
-
|
| 351 |
-
// Skip this subgroup if no metrics are selected
|
| 352 |
-
if (visibleSubgroupMetrics.length === 0) return null
|
| 353 |
-
|
| 354 |
-
return (
|
| 355 |
-
<React.Fragment key={`${group}-${subGroup}`}>
|
| 356 |
-
{/* Subgroup row with average stats for the subgroup */}
|
| 357 |
-
<tr
|
| 358 |
-
className="bg-base-100 cursor-pointer hover:bg-base-200"
|
| 359 |
-
onClick={() => toggleSubGroup(group, subGroup)}
|
| 360 |
-
>
|
| 361 |
-
<td className="pl-6 font-medium">
|
| 362 |
-
{openSubGroups[group]?.[subGroup] ? '▼ ' : '▶ '}
|
| 363 |
-
{subGroup}
|
| 364 |
-
</td>
|
| 365 |
-
{/* For each metric column */}
|
| 366 |
-
{overallMetrics.map((metric) => (
|
| 367 |
-
// Render sub-columns for each model
|
| 368 |
-
<React.Fragment key={`${group}-${subGroup}-${metric}`}>
|
| 369 |
-
{tableHeader
|
| 370 |
-
.filter((model) => selectedModels.has(model))
|
| 371 |
-
.map((col) => {
|
| 372 |
-
// Find all metrics in this subgroup that match the current metric name
|
| 373 |
-
const allMetricsWithName = findAllMetricsForName(metric)
|
| 374 |
-
const metricsInSubgroupForThisMetric =
|
| 375 |
-
visibleSubgroupMetrics.filter((m) =>
|
| 376 |
-
allMetricsWithName.includes(m)
|
| 377 |
-
)
|
| 378 |
-
const stats = calculateStats(
|
| 379 |
-
metricsInSubgroupForThisMetric,
|
| 380 |
-
col
|
| 381 |
-
)
|
| 382 |
-
|
| 383 |
-
return (
|
| 384 |
-
<td
|
| 385 |
-
key={`${group}-${subGroup}-${metric}-${col}`}
|
| 386 |
-
className="font-medium text-center"
|
| 387 |
-
>
|
| 388 |
-
{!isNaN(stats.avg)
|
| 389 |
-
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
| 390 |
-
: 'N/A'}
|
| 391 |
-
</td>
|
| 392 |
-
)
|
| 393 |
-
})}
|
| 394 |
-
</React.Fragment>
|
| 395 |
-
))}
|
| 396 |
-
</tr>
|
| 397 |
-
|
| 398 |
-
{/* Individual metric rows */}
|
| 399 |
-
{openSubGroups[group]?.[subGroup] &&
|
| 400 |
-
// Sort visibleSubgroupMetrics alphabetically by the clean metric name
|
| 401 |
-
[...visibleSubgroupMetrics]
|
| 402 |
-
.sort((a, b) => {
|
| 403 |
-
// For metrics with format {category}_{strength}_{overall_metric_name},
|
| 404 |
-
// First sort by category, then by overall_metric_name, then by strength
|
| 405 |
-
|
| 406 |
-
// First extract the overall metric group
|
| 407 |
-
const getOverallMetricGroup = (metric: string) => {
|
| 408 |
-
for (const overall of overallMetrics) {
|
| 409 |
-
if (
|
| 410 |
-
metric.endsWith(`_${overall}`) ||
|
| 411 |
-
metric === overall
|
| 412 |
-
) {
|
| 413 |
-
return overall
|
| 414 |
-
}
|
| 415 |
-
}
|
| 416 |
-
return ''
|
| 417 |
-
}
|
| 418 |
-
|
| 419 |
-
const overallA = getOverallMetricGroup(a)
|
| 420 |
-
const overallB = getOverallMetricGroup(b)
|
| 421 |
-
|
| 422 |
-
// Extract the strength (last part before the overall metric)
|
| 423 |
-
const stripOverall = (metric: string, overall: string) => {
|
| 424 |
-
if (metric.endsWith(`_${overall}`)) {
|
| 425 |
-
// Remove the overall metric group and any preceding underscore
|
| 426 |
-
const stripped = metric.slice(
|
| 427 |
-
0,
|
| 428 |
-
metric.length - overall.length - 1
|
| 429 |
-
)
|
| 430 |
-
const parts = stripped.split('_')
|
| 431 |
-
return parts.length > 0 ? parts[parts.length - 1] : ''
|
| 432 |
-
}
|
| 433 |
-
return metric
|
| 434 |
-
}
|
| 435 |
-
|
| 436 |
-
// Extract the category (what remains after removing strength and overall_metric_name)
|
| 437 |
-
const getCategory = (metric: string, overall: string) => {
|
| 438 |
-
if (metric.endsWith(`_${overall}`)) {
|
| 439 |
-
const stripped = metric.slice(
|
| 440 |
-
0,
|
| 441 |
-
metric.length - overall.length - 1
|
| 442 |
-
)
|
| 443 |
-
const parts = stripped.split('_')
|
| 444 |
-
// Remove the last part (strength) and join the rest (category)
|
| 445 |
-
return parts.length > 1
|
| 446 |
-
? parts.slice(0, parts.length - 1).join('_')
|
| 447 |
-
: ''
|
| 448 |
-
}
|
| 449 |
-
return metric
|
| 450 |
-
}
|
| 451 |
-
|
| 452 |
-
const categoryA = getCategory(a, overallA)
|
| 453 |
-
const categoryB = getCategory(b, overallB)
|
| 454 |
-
|
| 455 |
-
// First sort by category
|
| 456 |
-
if (categoryA !== categoryB) {
|
| 457 |
-
return categoryA.localeCompare(categoryB)
|
| 458 |
-
}
|
| 459 |
-
|
| 460 |
-
// Then sort by overall metric name
|
| 461 |
-
if (overallA !== overallB) {
|
| 462 |
-
return overallA.localeCompare(overallB)
|
| 463 |
-
}
|
| 464 |
-
|
| 465 |
-
// Finally sort by strength
|
| 466 |
-
const subA = stripOverall(a, overallA)
|
| 467 |
-
const subB = stripOverall(b, overallB)
|
| 468 |
-
|
| 469 |
-
// Try to parse subA and subB as numbers, handling k/m/b suffixes
|
| 470 |
-
const parseNumber = (str: string) => {
|
| 471 |
-
const match = str.match(/^(\d+(?:\.\d+)?)([kKmMbB]?)$/)
|
| 472 |
-
if (!match) return NaN
|
| 473 |
-
let [_, num, suffix] = match
|
| 474 |
-
let value = parseFloat(num)
|
| 475 |
-
switch (suffix.toLowerCase()) {
|
| 476 |
-
case 'k':
|
| 477 |
-
value *= 1e3
|
| 478 |
-
break
|
| 479 |
-
case 'm':
|
| 480 |
-
value *= 1e6
|
| 481 |
-
break
|
| 482 |
-
case 'b':
|
| 483 |
-
value *= 1e9
|
| 484 |
-
break
|
| 485 |
-
}
|
| 486 |
-
return value
|
| 487 |
-
}
|
| 488 |
-
|
| 489 |
-
const numA = parseNumber(subA)
|
| 490 |
-
const numB = parseNumber(subB)
|
| 491 |
-
|
| 492 |
-
if (!isNaN(numA) && !isNaN(numB)) {
|
| 493 |
-
return numA - numB
|
| 494 |
-
}
|
| 495 |
-
// Fallback to string comparison if not both numbers
|
| 496 |
-
return subA.localeCompare(subB)
|
| 497 |
-
})
|
| 498 |
-
.map((metric) => {
|
| 499 |
-
const row = tableRows.find((r) => r.metric === metric)
|
| 500 |
-
if (!row) return null
|
| 501 |
-
|
| 502 |
-
// Extract the metric name (after the underscore)
|
| 503 |
-
const metricName = metric.includes('_')
|
| 504 |
-
? metric.split('_').slice(1).join('_')
|
| 505 |
-
: metric
|
| 506 |
-
|
| 507 |
-
return (
|
| 508 |
-
<tr key={metric} className="hover:bg-base-100">
|
| 509 |
-
<td className="pl-10">{metric}</td>
|
| 510 |
-
{/* For each metric column */}
|
| 511 |
-
{overallMetrics.map((oMetric) => {
|
| 512 |
-
// Only show values for the matching metric
|
| 513 |
-
const isMatchingMetric =
|
| 514 |
-
findAllMetricsForName(oMetric).includes(metric)
|
| 515 |
-
|
| 516 |
-
if (!isMatchingMetric) {
|
| 517 |
-
// Fill empty cells for non-matching metrics
|
| 518 |
-
return (
|
| 519 |
-
<React.Fragment key={`${metric}-${oMetric}`}>
|
| 520 |
-
{tableHeader
|
| 521 |
-
.filter((model) => selectedModels.has(model))
|
| 522 |
-
.map((col) => (
|
| 523 |
-
<td
|
| 524 |
-
key={`${metric}-${oMetric}-${col}`}
|
| 525 |
-
className="text-center"
|
| 526 |
-
></td>
|
| 527 |
-
))}
|
| 528 |
-
</React.Fragment>
|
| 529 |
-
)
|
| 530 |
-
}
|
| 531 |
-
|
| 532 |
-
// Show values for the matching metric
|
| 533 |
-
return (
|
| 534 |
-
<React.Fragment key={`${metric}-${oMetric}`}>
|
| 535 |
-
{tableHeader
|
| 536 |
-
.filter((model) => selectedModels.has(model))
|
| 537 |
-
.map((col) => {
|
| 538 |
-
const cell = row[col]
|
| 539 |
-
return (
|
| 540 |
-
<td
|
| 541 |
-
key={`${metric}-${oMetric}-${col}`}
|
| 542 |
-
className="text-center"
|
| 543 |
-
>
|
| 544 |
-
{!isNaN(Number(cell))
|
| 545 |
-
? Number(Number(cell).toFixed(3))
|
| 546 |
-
: cell}
|
| 547 |
-
</td>
|
| 548 |
-
)
|
| 549 |
-
})}
|
| 550 |
-
</React.Fragment>
|
| 551 |
-
)
|
| 552 |
-
})}
|
| 553 |
-
</tr>
|
| 554 |
-
)
|
| 555 |
-
})}
|
| 556 |
-
</React.Fragment>
|
| 557 |
-
)
|
| 558 |
-
})}
|
| 559 |
-
</React.Fragment>
|
| 560 |
-
)
|
| 561 |
-
})}
|
| 562 |
-
</tbody>
|
| 563 |
-
</table>
|
| 564 |
-
|
| 565 |
-
{/* Separate table for metrics that don't belong to any overall group */}
|
| 566 |
{(() => {
|
| 567 |
const standaloneMetrics = findStandaloneMetrics()
|
| 568 |
if (standaloneMetrics.length === 0) return null
|
| 569 |
-
|
| 570 |
return (
|
| 571 |
-
<div className="
|
| 572 |
-
<
|
| 573 |
-
<table className="table w-full">
|
| 574 |
<thead>
|
| 575 |
<tr>
|
| 576 |
-
<th>
|
|
|
|
|
|
|
| 577 |
{tableHeader
|
| 578 |
.filter((model) => selectedModels.has(model))
|
| 579 |
.map((model) => (
|
| 580 |
-
<th
|
|
|
|
|
|
|
|
|
|
| 581 |
{model}
|
| 582 |
</th>
|
| 583 |
))}
|
|
@@ -587,16 +285,20 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
|
|
| 587 |
{standaloneMetrics.sort().map((metric) => {
|
| 588 |
const row = tableRows.find((r) => r.metric === metric)
|
| 589 |
if (!row) return null
|
| 590 |
-
|
| 591 |
return (
|
| 592 |
<tr key={`standalone-${metric}`} className="hover:bg-base-100">
|
| 593 |
-
<td>
|
|
|
|
|
|
|
| 594 |
{tableHeader
|
| 595 |
.filter((model) => selectedModels.has(model))
|
| 596 |
.map((col) => {
|
| 597 |
const cell = row[col]
|
| 598 |
return (
|
| 599 |
-
<td
|
|
|
|
|
|
|
|
|
|
| 600 |
{!isNaN(Number(cell))
|
| 601 |
? Number(Number(cell).toFixed(3))
|
| 602 |
: cell}
|
|
@@ -611,6 +313,342 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
|
|
| 611 |
</div>
|
| 612 |
)
|
| 613 |
})()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 614 |
</>
|
| 615 |
)}
|
| 616 |
</div>
|
|
|
|
| 1 |
import React, { useEffect, useState } from 'react'
|
|
|
|
| 2 |
import LeaderboardFilter from './LeaderboardFilter'
|
| 3 |
+
import LoadingSpinner from './LoadingSpinner'
|
| 4 |
|
| 5 |
interface LeaderboardTableProps {
|
| 6 |
+
benchmarkData: any
|
| 7 |
+
selectedModels: Set<string>
|
| 8 |
}
|
| 9 |
|
| 10 |
interface Row {
|
|
|
|
| 16 |
[group: string]: { [subgroup: string]: string[] }
|
| 17 |
}
|
| 18 |
|
| 19 |
+
const OverallMetricFilter: React.FC<{
|
| 20 |
+
overallMetrics: string[]
|
| 21 |
+
selectedOverallMetrics: Set<string>
|
| 22 |
+
setSelectedOverallMetrics: (metrics: Set<string>) => void
|
| 23 |
+
}> = ({ overallMetrics, selectedOverallMetrics, setSelectedOverallMetrics }) => {
|
| 24 |
+
const toggleMetric = (metric: string) => {
|
| 25 |
+
const newSelected = new Set(selectedOverallMetrics)
|
| 26 |
+
if (newSelected.has(metric)) {
|
| 27 |
+
newSelected.delete(metric)
|
| 28 |
+
} else {
|
| 29 |
+
newSelected.add(metric)
|
| 30 |
+
}
|
| 31 |
+
setSelectedOverallMetrics(newSelected)
|
| 32 |
+
}
|
| 33 |
+
return (
|
| 34 |
+
<div className="w-full mb-4">
|
| 35 |
+
<fieldset className="fieldset w-full p-4 rounded border border-gray-700">
|
| 36 |
+
<legend className="fieldset-legend font-semibold">
|
| 37 |
+
Metrics ({selectedOverallMetrics.size}/{overallMetrics.length})
|
| 38 |
+
</legend>
|
| 39 |
+
<div className="grid grid-cols-2 md:grid-cols-4 lg:grid-cols-6 gap-1 max-h-48 overflow-y-auto pr-2">
|
| 40 |
+
{overallMetrics.map((metric) => (
|
| 41 |
+
<label key={metric} className="flex items-center gap-2 text-sm">
|
| 42 |
+
<input
|
| 43 |
+
type="checkbox"
|
| 44 |
+
className="form-checkbox h-4 w-4"
|
| 45 |
+
checked={selectedOverallMetrics.has(metric)}
|
| 46 |
+
onChange={() => toggleMetric(metric)}
|
| 47 |
+
/>
|
| 48 |
+
<span className="truncate" title={metric}>
|
| 49 |
+
{metric}
|
| 50 |
+
</span>
|
| 51 |
+
</label>
|
| 52 |
+
))}
|
| 53 |
+
</div>
|
| 54 |
+
</fieldset>
|
| 55 |
+
</div>
|
| 56 |
+
)
|
| 57 |
}
|
| 58 |
|
| 59 |
+
const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ benchmarkData, selectedModels }) => {
|
| 60 |
const [tableRows, setTableRows] = useState<Row[]>([])
|
| 61 |
const [tableHeader, setTableHeader] = useState<string[]>([])
|
|
|
|
| 62 |
const [error, setError] = useState<string | null>(null)
|
| 63 |
const [groups, setGroups] = useState<Groups>({})
|
| 64 |
const [openGroups, setOpenGroups] = useState<{ [key: string]: boolean }>({})
|
| 65 |
const [openSubGroups, setOpenSubGroups] = useState<{ [key: string]: { [key: string]: boolean } }>(
|
| 66 |
{}
|
| 67 |
)
|
|
|
|
| 68 |
const [selectedMetrics, setSelectedMetrics] = useState<Set<string>>(new Set())
|
|
|
|
|
|
|
|
|
|
| 69 |
const [overallMetrics, setOverallMetrics] = useState<string[]>([])
|
| 70 |
+
const [selectedOverallMetrics, setSelectedOverallMetrics] = useState<Set<string>>(new Set())
|
| 71 |
|
| 72 |
useEffect(() => {
|
| 73 |
+
if (!benchmarkData) {
|
| 74 |
+
return
|
| 75 |
+
}
|
| 76 |
+
try {
|
| 77 |
+
const data = benchmarkData
|
| 78 |
+
const rows: Row[] = data['rows']
|
| 79 |
+
const allGroups = data['groups'] as { [key: string]: string[] }
|
| 80 |
+
const { Overall: overallGroup, ...groups } = allGroups
|
| 81 |
+
const uniqueMetrics = new Set<string>()
|
| 82 |
+
overallGroup?.forEach((metric) => {
|
| 83 |
+
if (metric.includes('_')) {
|
| 84 |
+
const metricName = metric.split('_').slice(1).join('_')
|
| 85 |
+
uniqueMetrics.add(metricName)
|
| 86 |
+
}
|
| 87 |
+
})
|
| 88 |
+
setOverallMetrics(Array.from(uniqueMetrics).sort())
|
| 89 |
+
setSelectedOverallMetrics(new Set(Array.from(uniqueMetrics)))
|
| 90 |
+
const groupsData = Object.entries(groups)
|
| 91 |
+
.sort(([groupA], [groupB]) => {
|
| 92 |
+
if (groupA === 'Overall') return -1
|
| 93 |
+
if (groupB === 'Overall') return 1
|
| 94 |
+
return groupA.localeCompare(groupB)
|
| 95 |
})
|
| 96 |
+
.reduce(
|
| 97 |
+
(acc, [group, metrics]) => {
|
| 98 |
+
const sortedMetrics = [...metrics].sort()
|
| 99 |
+
acc[group] = sortedMetrics.reduce<{ [key: string]: string[] }>((subAcc, metric) => {
|
| 100 |
+
const [mainGroup, subGroup] = metric.split('_')
|
| 101 |
+
if (!subAcc[mainGroup]) {
|
| 102 |
+
subAcc[mainGroup] = []
|
| 103 |
+
}
|
| 104 |
+
subAcc[mainGroup].push(metric)
|
| 105 |
+
return subAcc
|
| 106 |
+
}, {})
|
| 107 |
+
acc[group] = Object.fromEntries(
|
| 108 |
+
Object.entries(acc[group]).sort(([subGroupA], [subGroupB]) =>
|
| 109 |
+
subGroupA.localeCompare(subGroupB)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
)
|
| 111 |
+
)
|
| 112 |
+
return acc
|
| 113 |
+
},
|
| 114 |
+
{} as { [key: string]: { [key: string]: string[] } }
|
| 115 |
+
)
|
| 116 |
+
const allKeys: string[] = Array.from(new Set(rows.flatMap((row) => Object.keys(row))))
|
| 117 |
+
const headers = allKeys.filter((key) => key !== 'metric')
|
| 118 |
+
const initialOpenGroups: { [key: string]: boolean } = {}
|
| 119 |
+
const initialOpenSubGroups: { [key: string]: { [key: string]: boolean } } = {}
|
| 120 |
+
Object.keys(groupsData).forEach((group) => {
|
| 121 |
+
initialOpenGroups[group] = false
|
| 122 |
+
initialOpenSubGroups[group] = {}
|
| 123 |
+
Object.keys(groupsData[group]).forEach((subGroup) => {
|
| 124 |
+
initialOpenSubGroups[group][subGroup] = false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
})
|
| 127 |
+
const allMetrics = Object.values(groups).flat()
|
| 128 |
+
setSelectedMetrics(new Set(allMetrics))
|
| 129 |
+
setTableHeader(headers)
|
| 130 |
+
setTableRows(rows)
|
| 131 |
+
setGroups(groupsData)
|
| 132 |
+
setOpenGroups(initialOpenGroups)
|
| 133 |
+
setOpenSubGroups(initialOpenSubGroups)
|
| 134 |
+
setError(null)
|
| 135 |
+
} catch (err: any) {
|
| 136 |
+
setError('Failed to parse benchmark data, please try again: ' + err.message)
|
| 137 |
+
}
|
| 138 |
+
}, [benchmarkData])
|
| 139 |
|
| 140 |
const toggleGroup = (group: string) => {
|
| 141 |
setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
|
|
|
|
| 234 |
}
|
| 235 |
|
| 236 |
return (
|
| 237 |
+
<div className="rounded shadow">
|
|
|
|
| 238 |
{error && <div className="text-red-500">{error}</div>}
|
| 239 |
+
{!error && (
|
| 240 |
+
<div className="flex flex-col gap-8">
|
|
|
|
| 241 |
<div className="flex flex-col gap-4">
|
| 242 |
+
<OverallMetricFilter
|
| 243 |
+
overallMetrics={overallMetrics}
|
| 244 |
+
selectedOverallMetrics={selectedOverallMetrics}
|
| 245 |
+
setSelectedOverallMetrics={setSelectedOverallMetrics}
|
| 246 |
/>
|
| 247 |
+
{/* <LeaderboardFilter
|
| 248 |
groups={groups}
|
| 249 |
selectedMetrics={selectedMetrics}
|
| 250 |
setSelectedMetrics={setSelectedMetrics}
|
| 251 |
+
/> */}
|
| 252 |
</div>
|
| 253 |
|
| 254 |
{selectedModels.size === 0 || selectedMetrics.size === 0 ? (
|
|
|
|
| 257 |
</div>
|
| 258 |
) : (
|
| 259 |
<>
|
| 260 |
+
{/* Standalone metrics table */}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
{(() => {
|
| 262 |
const standaloneMetrics = findStandaloneMetrics()
|
| 263 |
if (standaloneMetrics.length === 0) return null
|
|
|
|
| 264 |
return (
|
| 265 |
+
<div className="overflow-x-auto max-h-[80vh] overflow-y-auto">
|
| 266 |
+
<table className="table w-full min-w-max border-gray-700 border">
|
|
|
|
| 267 |
<thead>
|
| 268 |
<tr>
|
| 269 |
+
<th className="sticky left-0 top-0 bg-base-100 z-20 border-gray-700 border">
|
| 270 |
+
Metric
|
| 271 |
+
</th>
|
| 272 |
{tableHeader
|
| 273 |
.filter((model) => selectedModels.has(model))
|
| 274 |
.map((model) => (
|
| 275 |
+
<th
|
| 276 |
+
key={`standalone-${model}`}
|
| 277 |
+
className="sticky top-0 bg-base-100 z-10 text-center text-xs border-gray-700 border"
|
| 278 |
+
>
|
| 279 |
{model}
|
| 280 |
</th>
|
| 281 |
))}
|
|
|
|
| 285 |
{standaloneMetrics.sort().map((metric) => {
|
| 286 |
const row = tableRows.find((r) => r.metric === metric)
|
| 287 |
if (!row) return null
|
|
|
|
| 288 |
return (
|
| 289 |
<tr key={`standalone-${metric}`} className="hover:bg-base-100">
|
| 290 |
+
<td className="sticky left-0 bg-base-100 z-10 border-gray-700 border">
|
| 291 |
+
{metric}
|
| 292 |
+
</td>
|
| 293 |
{tableHeader
|
| 294 |
.filter((model) => selectedModels.has(model))
|
| 295 |
.map((col) => {
|
| 296 |
const cell = row[col]
|
| 297 |
return (
|
| 298 |
+
<td
|
| 299 |
+
key={`standalone-${metric}-${col}`}
|
| 300 |
+
className="text-center border-gray-700 border"
|
| 301 |
+
>
|
| 302 |
{!isNaN(Number(cell))
|
| 303 |
? Number(Number(cell).toFixed(3))
|
| 304 |
: cell}
|
|
|
|
| 313 |
</div>
|
| 314 |
)
|
| 315 |
})()}
|
| 316 |
+
|
| 317 |
+
{/* Main metrics table */}
|
| 318 |
+
<div className="overflow-x-auto max-h-[80vh] overflow-y-auto">
|
| 319 |
+
<table className="table w-full min-w-max border-gray-700 border">
|
| 320 |
+
<thead>
|
| 321 |
+
<tr>
|
| 322 |
+
<th className="sticky left-0 top-0 bg-base-100 z-20 border-gray-700 border">
|
| 323 |
+
Attack Category Metrics
|
| 324 |
+
</th>
|
| 325 |
+
{overallMetrics
|
| 326 |
+
.filter((metric) => selectedOverallMetrics.has(metric))
|
| 327 |
+
.map((metric) => (
|
| 328 |
+
<th
|
| 329 |
+
key={metric}
|
| 330 |
+
colSpan={
|
| 331 |
+
tableHeader.filter((model) => selectedModels.has(model)).length
|
| 332 |
+
}
|
| 333 |
+
className="sticky top-0 bg-base-100 z-10 text-center border-x border-gray-300 border border-gray-700 border"
|
| 334 |
+
>
|
| 335 |
+
{metric}
|
| 336 |
+
</th>
|
| 337 |
+
))}
|
| 338 |
+
</tr>
|
| 339 |
+
<tr>
|
| 340 |
+
<th className="sticky left-0 bg-base-100 z-10 border-gray-700 border"></th>
|
| 341 |
+
{overallMetrics
|
| 342 |
+
.filter((metric) => selectedOverallMetrics.has(metric))
|
| 343 |
+
.map((metric) => (
|
| 344 |
+
<React.Fragment key={`header-models-${metric}`}>
|
| 345 |
+
{tableHeader
|
| 346 |
+
.filter((model) => selectedModels.has(model))
|
| 347 |
+
.map((model) => (
|
| 348 |
+
<th
|
| 349 |
+
key={`${metric}-${model}`}
|
| 350 |
+
className="sticky top-12 bg-base-100 z-10 text-center text-xs border-gray-700 border border-bottom-solid border-b-gray-700 border-b-2"
|
| 351 |
+
>
|
| 352 |
+
{model}
|
| 353 |
+
</th>
|
| 354 |
+
))}
|
| 355 |
+
</React.Fragment>
|
| 356 |
+
))}
|
| 357 |
+
</tr>
|
| 358 |
+
</thead>
|
| 359 |
+
<tbody>
|
| 360 |
+
{/* First render each group */}
|
| 361 |
+
{Object.entries(groups).map(([group, subGroups]) => {
|
| 362 |
+
// Skip the "Overall" group completely
|
| 363 |
+
if (group === 'Overall') return null
|
| 364 |
+
|
| 365 |
+
// Get all metrics for this group
|
| 366 |
+
const allGroupMetrics = Object.values(subGroups).flat()
|
| 367 |
+
// Filter to only include selected metrics
|
| 368 |
+
const visibleGroupMetrics = filterMetricsByGroupAndSubgroup(
|
| 369 |
+
allGroupMetrics,
|
| 370 |
+
group
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
// Skip this group if no metrics are selected
|
| 374 |
+
if (visibleGroupMetrics.length === 0) return null
|
| 375 |
+
|
| 376 |
+
return (
|
| 377 |
+
<React.Fragment key={group}>
|
| 378 |
+
{/* Group row with average stats for the entire group */}
|
| 379 |
+
<tr
|
| 380 |
+
className="bg-base-200 cursor-pointer hover:bg-base-300"
|
| 381 |
+
onClick={() => toggleGroup(group)}
|
| 382 |
+
>
|
| 383 |
+
<td className="sticky left-0 bg-base-200 z-10 font-medium border-gray-700 border">
|
| 384 |
+
{openGroups[group] ? '▼ ' : '▶ '}
|
| 385 |
+
{group}
|
| 386 |
+
</td>
|
| 387 |
+
{/* For each metric column */}
|
| 388 |
+
{overallMetrics
|
| 389 |
+
.filter((metric) => selectedOverallMetrics.has(metric))
|
| 390 |
+
.map((metric) => (
|
| 391 |
+
// Render sub-columns for each model
|
| 392 |
+
<React.Fragment key={`${group}-${metric}`}>
|
| 393 |
+
{tableHeader
|
| 394 |
+
.filter((model) => selectedModels.has(model))
|
| 395 |
+
.map((col) => {
|
| 396 |
+
// Find all metrics in this group that match the current metric name
|
| 397 |
+
const allMetricsWithName = findAllMetricsForName(metric)
|
| 398 |
+
const metricsInGroupForThisMetric =
|
| 399 |
+
visibleGroupMetrics.filter((m) =>
|
| 400 |
+
allMetricsWithName.includes(m)
|
| 401 |
+
)
|
| 402 |
+
const stats = calculateStats(metricsInGroupForThisMetric, col)
|
| 403 |
+
|
| 404 |
+
return (
|
| 405 |
+
<td
|
| 406 |
+
key={`${group}-${metric}-${col}`}
|
| 407 |
+
className="font-medium text-center border-gray-700 border"
|
| 408 |
+
>
|
| 409 |
+
{!isNaN(stats.avg)
|
| 410 |
+
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
| 411 |
+
: 'N/A'}
|
| 412 |
+
</td>
|
| 413 |
+
)
|
| 414 |
+
})}
|
| 415 |
+
</React.Fragment>
|
| 416 |
+
))}
|
| 417 |
+
</tr>
|
| 418 |
+
|
| 419 |
+
{/* Only render subgroups if group is open */}
|
| 420 |
+
{openGroups[group] &&
|
| 421 |
+
Object.entries(subGroups).map(([subGroup, metrics]) => {
|
| 422 |
+
// Filter to only include selected metrics in this subgroup
|
| 423 |
+
const visibleSubgroupMetrics = filterMetricsByGroupAndSubgroup(
|
| 424 |
+
metrics,
|
| 425 |
+
group,
|
| 426 |
+
subGroup
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
// Skip this subgroup if no metrics are selected
|
| 430 |
+
if (visibleSubgroupMetrics.length === 0) return null
|
| 431 |
+
|
| 432 |
+
return (
|
| 433 |
+
<React.Fragment key={`${group}-${subGroup}`}>
|
| 434 |
+
{/* Subgroup row with average stats for the subgroup */}
|
| 435 |
+
<tr
|
| 436 |
+
className="bg-base-100 cursor-pointer hover:bg-base-200"
|
| 437 |
+
onClick={() => toggleSubGroup(group, subGroup)}
|
| 438 |
+
>
|
| 439 |
+
<td className="sticky left-0 bg-base-100 z-10 pl-6 font-medium border-gray-700 border">
|
| 440 |
+
{openSubGroups[group]?.[subGroup] ? '▼ ' : '▶ '}
|
| 441 |
+
{subGroup}
|
| 442 |
+
</td>
|
| 443 |
+
{/* For each metric column */}
|
| 444 |
+
{overallMetrics
|
| 445 |
+
.filter((metric) => selectedOverallMetrics.has(metric))
|
| 446 |
+
.map((metric) => (
|
| 447 |
+
// Render sub-columns for each model
|
| 448 |
+
<React.Fragment key={`${group}-${subGroup}-${metric}`}>
|
| 449 |
+
{tableHeader
|
| 450 |
+
.filter((model) => selectedModels.has(model))
|
| 451 |
+
.map((col) => {
|
| 452 |
+
// Find all metrics in this subgroup that match the current metric name
|
| 453 |
+
const allMetricsWithName =
|
| 454 |
+
findAllMetricsForName(metric)
|
| 455 |
+
const metricsInSubgroupForThisMetric =
|
| 456 |
+
visibleSubgroupMetrics.filter((m) =>
|
| 457 |
+
allMetricsWithName.includes(m)
|
| 458 |
+
)
|
| 459 |
+
const stats = calculateStats(
|
| 460 |
+
metricsInSubgroupForThisMetric,
|
| 461 |
+
col
|
| 462 |
+
)
|
| 463 |
+
|
| 464 |
+
return (
|
| 465 |
+
<td
|
| 466 |
+
key={`${group}-${subGroup}-${metric}-${col}`}
|
| 467 |
+
className="font-medium text-center border-gray-700 border"
|
| 468 |
+
>
|
| 469 |
+
{!isNaN(stats.avg)
|
| 470 |
+
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
| 471 |
+
: 'N/A'}
|
| 472 |
+
</td>
|
| 473 |
+
)
|
| 474 |
+
})}
|
| 475 |
+
</React.Fragment>
|
| 476 |
+
))}
|
| 477 |
+
</tr>
|
| 478 |
+
|
| 479 |
+
{/* Individual metric rows */}
|
| 480 |
+
{openSubGroups[group]?.[subGroup] &&
|
| 481 |
+
// Sort visibleSubgroupMetrics alphabetically by the clean metric name
|
| 482 |
+
[...visibleSubgroupMetrics]
|
| 483 |
+
.sort((a, b) => {
|
| 484 |
+
// For metrics with format {category}_{strength}_{overall_metric_name},
|
| 485 |
+
// First sort by category, then by overall_metric_name, then by strength
|
| 486 |
+
|
| 487 |
+
// First extract the overall metric group
|
| 488 |
+
const getOverallMetricGroup = (metric: string) => {
|
| 489 |
+
for (const overall of overallMetrics) {
|
| 490 |
+
if (
|
| 491 |
+
metric.endsWith(`_${overall}`) ||
|
| 492 |
+
metric === overall
|
| 493 |
+
) {
|
| 494 |
+
return overall
|
| 495 |
+
}
|
| 496 |
+
}
|
| 497 |
+
return ''
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
const overallA = getOverallMetricGroup(a)
|
| 501 |
+
const overallB = getOverallMetricGroup(b)
|
| 502 |
+
|
| 503 |
+
// Extract the strength (last part before the overall metric)
|
| 504 |
+
const stripOverall = (metric: string, overall: string) => {
|
| 505 |
+
if (metric.endsWith(`_${overall}`)) {
|
| 506 |
+
// Remove the overall metric group and any preceding underscore
|
| 507 |
+
const stripped = metric.slice(
|
| 508 |
+
0,
|
| 509 |
+
metric.length - overall.length - 1
|
| 510 |
+
)
|
| 511 |
+
const parts = stripped.split('_')
|
| 512 |
+
return parts.length > 0 ? parts[parts.length - 1] : ''
|
| 513 |
+
}
|
| 514 |
+
return metric
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
// Extract the category (what remains after removing strength and overall_metric_name)
|
| 518 |
+
const getCategory = (metric: string, overall: string) => {
|
| 519 |
+
if (metric.endsWith(`_${overall}`)) {
|
| 520 |
+
const stripped = metric.slice(
|
| 521 |
+
0,
|
| 522 |
+
metric.length - overall.length - 1
|
| 523 |
+
)
|
| 524 |
+
const parts = stripped.split('_')
|
| 525 |
+
// Remove the last part (strength) and join the rest (category)
|
| 526 |
+
return parts.length > 1
|
| 527 |
+
? parts.slice(0, parts.length - 1).join('_')
|
| 528 |
+
: ''
|
| 529 |
+
}
|
| 530 |
+
return metric
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
const categoryA = getCategory(a, overallA)
|
| 534 |
+
const categoryB = getCategory(b, overallB)
|
| 535 |
+
|
| 536 |
+
// First sort by category
|
| 537 |
+
if (categoryA !== categoryB) {
|
| 538 |
+
return categoryA.localeCompare(categoryB)
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
// Then sort by overall metric name
|
| 542 |
+
if (overallA !== overallB) {
|
| 543 |
+
return overallA.localeCompare(overallB)
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
// Finally sort by strength
|
| 547 |
+
const subA = stripOverall(a, overallA)
|
| 548 |
+
const subB = stripOverall(b, overallB)
|
| 549 |
+
|
| 550 |
+
// Try to parse subA and subB as numbers, handling k/m/b suffixes
|
| 551 |
+
const parseNumber = (str: string) => {
|
| 552 |
+
const match = str.match(/^(\d+(?:\.\d+)?)([kKmMbB]?)$/)
|
| 553 |
+
if (!match) return NaN
|
| 554 |
+
let [_, num, suffix] = match
|
| 555 |
+
let value = parseFloat(num)
|
| 556 |
+
switch (suffix.toLowerCase()) {
|
| 557 |
+
case 'k':
|
| 558 |
+
value *= 1e3
|
| 559 |
+
break
|
| 560 |
+
case 'm':
|
| 561 |
+
value *= 1e6
|
| 562 |
+
break
|
| 563 |
+
case 'b':
|
| 564 |
+
value *= 1e9
|
| 565 |
+
break
|
| 566 |
+
}
|
| 567 |
+
return value
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
const numA = parseNumber(subA)
|
| 571 |
+
const numB = parseNumber(subB)
|
| 572 |
+
|
| 573 |
+
if (!isNaN(numA) && !isNaN(numB)) {
|
| 574 |
+
return numA - numB
|
| 575 |
+
}
|
| 576 |
+
// Fallback to string comparison if not both numbers
|
| 577 |
+
return subA.localeCompare(subB)
|
| 578 |
+
})
|
| 579 |
+
.map((metric) => {
|
| 580 |
+
const row = tableRows.find((r) => r.metric === metric)
|
| 581 |
+
if (!row) return null
|
| 582 |
+
|
| 583 |
+
// Extract the metric name (after the underscore)
|
| 584 |
+
const metricName = metric.includes('_')
|
| 585 |
+
? metric.split('_').slice(1).join('_')
|
| 586 |
+
: metric
|
| 587 |
+
|
| 588 |
+
return (
|
| 589 |
+
<tr key={metric} className="hover:bg-base-100">
|
| 590 |
+
<td className="sticky left-0 bg-base-100 z-10 pl-10 border-gray-700 border">
|
| 591 |
+
{metric}
|
| 592 |
+
</td>
|
| 593 |
+
{/* For each metric column */}
|
| 594 |
+
{overallMetrics
|
| 595 |
+
.filter((oMetric) =>
|
| 596 |
+
selectedOverallMetrics.has(oMetric)
|
| 597 |
+
)
|
| 598 |
+
.map((oMetric) => {
|
| 599 |
+
// Only show values for the matching metric
|
| 600 |
+
const isMatchingMetric =
|
| 601 |
+
findAllMetricsForName(oMetric).includes(metric)
|
| 602 |
+
|
| 603 |
+
if (!isMatchingMetric) {
|
| 604 |
+
// Fill empty cells for non-matching metrics
|
| 605 |
+
return (
|
| 606 |
+
<React.Fragment key={`${metric}-${oMetric}`}>
|
| 607 |
+
{tableHeader
|
| 608 |
+
.filter((model) =>
|
| 609 |
+
selectedModels.has(model)
|
| 610 |
+
)
|
| 611 |
+
.map((col) => (
|
| 612 |
+
<td
|
| 613 |
+
key={`${metric}-${oMetric}-${col}`}
|
| 614 |
+
className="text-center border-gray-700 border"
|
| 615 |
+
></td>
|
| 616 |
+
))}
|
| 617 |
+
</React.Fragment>
|
| 618 |
+
)
|
| 619 |
+
}
|
| 620 |
+
return (
|
| 621 |
+
<React.Fragment key={`${metric}-${oMetric}`}>
|
| 622 |
+
{tableHeader
|
| 623 |
+
.filter((model) => selectedModels.has(model))
|
| 624 |
+
.map((col) => {
|
| 625 |
+
const cell = row[col]
|
| 626 |
+
return (
|
| 627 |
+
<td
|
| 628 |
+
key={`${metric}-${oMetric}-${col}`}
|
| 629 |
+
className="text-center border-gray-700 border"
|
| 630 |
+
>
|
| 631 |
+
{!isNaN(Number(cell))
|
| 632 |
+
? Number(Number(cell).toFixed(3))
|
| 633 |
+
: cell}
|
| 634 |
+
</td>
|
| 635 |
+
)
|
| 636 |
+
})}
|
| 637 |
+
</React.Fragment>
|
| 638 |
+
)
|
| 639 |
+
})}
|
| 640 |
+
</tr>
|
| 641 |
+
)
|
| 642 |
+
})}
|
| 643 |
+
</React.Fragment>
|
| 644 |
+
)
|
| 645 |
+
})}
|
| 646 |
+
</React.Fragment>
|
| 647 |
+
)
|
| 648 |
+
})}
|
| 649 |
+
</tbody>
|
| 650 |
+
</table>
|
| 651 |
+
</div>
|
| 652 |
</>
|
| 653 |
)}
|
| 654 |
</div>
|
frontend/src/components/LoadingSpinner.tsx
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import React from 'react'
|
| 2 |
+
|
| 3 |
+
interface LoadingSpinnerProps {
|
| 4 |
+
minHeight?: string
|
| 5 |
+
}
|
| 6 |
+
|
| 7 |
+
const LoadingSpinner: React.FC<LoadingSpinnerProps> = ({ minHeight = '300px' }) => {
|
| 8 |
+
return (
|
| 9 |
+
<div className={`flex items-center justify-center min-h-[${minHeight}]`}>
|
| 10 |
+
<span className="loading loading-spinner loading-lg text-primary"></span>
|
| 11 |
+
</div>
|
| 12 |
+
)
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
export default LoadingSpinner
|
frontend/src/components/ModelFilter.tsx
CHANGED
|
@@ -19,7 +19,7 @@ const ModelFilter: React.FC<ModelFilterProps> = ({ models, selectedModels, setSe
|
|
| 19 |
|
| 20 |
return (
|
| 21 |
<div className="w-full mb-4">
|
| 22 |
-
<fieldset className="fieldset w-full p-4 rounded border">
|
| 23 |
<legend className="fieldset-legend font-semibold">
|
| 24 |
Models ({selectedModels.size}/{models.length})
|
| 25 |
</legend>
|
|
|
|
| 19 |
|
| 20 |
return (
|
| 21 |
<div className="w-full mb-4">
|
| 22 |
+
<fieldset className="fieldset w-full p-4 rounded border border-gray-700">
|
| 23 |
<legend className="fieldset-legend font-semibold">
|
| 24 |
Models ({selectedModels.size}/{models.length})
|
| 25 |
</legend>
|