import pandas as pd import gradio as gr class Leaderboard(): def __init__(self, data_path='models_performance.csv'): self.data_path = data_path self.models_performance_dataframe = self.initialize_models_performance_dataframe() self.link_dict = self.initialize_link_dict() self.dataset_list = sorted(list(set(self.models_performance_dataframe['dataset']))) self.emotional_dataset_list = sorted(self.initialize_emotional_datasets()) self.emotion_list = sorted(list(set(self.models_performance_dataframe['emotion']))) def get_dataset_list(self) -> list: return self.dataset_list def get_emotional_dataset_list(self) -> list: return self.emotional_dataset_list def get_emotion_list(self) -> list: return self.emotion_list def get_models_performance_dataframe_column(self) -> list: return self.models_performance_dataframe.columns # Load the models performance dataframe from a CSV file def initialize_models_performance_dataframe(self) -> pd.DataFrame: model_performance_dataframe = pd.read_csv(self.data_path) initial_columns = ['model', 'dataset', 'emotion', 'wavlm'] columns = initial_columns + [kol for kol in model_performance_dataframe.columns if kol not in initial_columns] return model_performance_dataframe[columns] # Initialize a list of emotional datasets def initialize_emotional_datasets(self) -> list: emotional_dataset_list = set() for _, row in self.models_performance_dataframe.iterrows(): if row['emotion'] != 'All': emotional_dataset_list.add(row['dataset']) return list(emotional_dataset_list) # Dictionary mapping model names to their GitHub repository links def initialize_link_dict(self) -> dict: return { 'WhisperSpeech': 'https://github.com/collabora/WhisperSpeech', 'SpeechT5': 'https://github.com/microsoft/SpeechT5', 'VALL-E-X': 'https://github.com/Plachtaa/VALL-E-X', 'XTTS-v2': 'https://huggingface.co/coqui/XTTS-v2', 'OuteTTS': 'https://huggingface.co/OuteAI/OuteTTS-0.2-500M' } def create_leaderboard_data(self, selected_emotion_or_dataset, selected_feature, emotion_or_dataset) -> pd.DataFrame: ''' Create leaderboard data. Args: selected_emotion_or_dataset (str): The selected emotion or dataset to filter the leaderboard data. selected_feature (str): The selected feature to display in the leaderboard. emotion_or_dataset (str): Indicates whether the selection is based on 'emotion' or 'dataset'. Returns: pd.DataFrame: A dataframe containing the leaderboard data. ''' # Create a copy of the dataframe. models_performance = self.models_performance_dataframe # Determine the opposite selection emotion or dataset opposite_emotion_or_dataset = 'dataset' if emotion_or_dataset == 'emotion' else 'emotion' # Filter the dataframe based on the selected emotion or dataset. models_performance = models_performance[models_performance[emotion_or_dataset] == selected_emotion_or_dataset] # Creating a dictionary that aggregates information from the dataframe for each model. leaderboard_data = {} for _, row in models_performance.iterrows(): if row['model'] not in leaderboard_data.keys(): leaderboard_data[row['model']] ={} if row[opposite_emotion_or_dataset] == 'All': leaderboard_data[row['model']]['Average'] = row[selected_feature] else: leaderboard_data[row['model']][row[opposite_emotion_or_dataset]] = row[selected_feature] # Creating a dataframe based on leaderboard_data dictionary data = [] for model, performance in leaderboard_data.items(): row = {'Model': model} row.update(performance) data.append(row) leaderboard_dataframe = pd.DataFrame(data) # Ensure specific columns appear first in the dataframe. if 'LibriSpeech Test Clean' in leaderboard_dataframe.keys(): initial_columns = ['Model', 'Average', 'LibriSpeech Test Clean'] else: initial_columns = ['Model', 'Average'] # Add other columns in sorted order. sorted_columns = initial_columns + sorted([col for col in leaderboard_dataframe.columns if col not in initial_columns]) leaderboard_dataframe = leaderboard_dataframe[sorted_columns] # Map model names to hyperlinks using the link_dict. leaderboard_dataframe['Model'] = leaderboard_dataframe['Model'].map(lambda model: f"[{model}]({self.link_dict.get(model, '')})") return leaderboard_dataframe def update_leaderboard_data_in_emotion_section(self, selected_emotion_or_dataset, emotion_or_dataset, leaderboard_table) -> pd.DataFrame: ''' Update leaderboard data based on selected emotion or dataset in the emotion section Args: selected_emotion_or_dataset (str): The selected emotion or dataset to filter the leaderboard data. emotion_or_dataset (str): Indicates whether the selection is based on 'emotion' or 'dataset'. leaderboard_table (pd.Dataframe): previous leaderboard data. Returns: pd.DataFrame: A dataframe containing the leaderboard data. ''' if selected_emotion_or_dataset != None: Leaderboard_dataframe = self.create_leaderboard_data(selected_emotion_or_dataset, 'wavlm', emotion_or_dataset) # Drop models without emotion-based division for dataset in self.dataset_list: if dataset in Leaderboard_dataframe.columns and dataset not in self.emotional_dataset_list: Leaderboard_dataframe.drop(columns=[dataset], inplace=True) return gr.update(value=None), Leaderboard_dataframe else: return gr.update(), leaderboard_table def update_leaderboard_data_in_feature_section(self, selected_emotion_or_dataset, selected_feature, emotion_or_dataset, leaderboard_table) -> pd.DataFrame: ''' Update leaderboard data based on selected emotion or dataset in the feature section Args: selected_emotion_or_dataset (str): The selected emotion or dataset to filter the leaderboard data. selected_feature (str): The selected feature to display in the leaderboard. emotion_or_dataset (str): Indicates whether the selection is based on 'emotion' or 'dataset'. leaderboard_table (pd.Dataframe): previous leaderboard data. Returns: pd.DataFrame: A dataframe containing the leaderboard data. ''' if selected_emotion_or_dataset != None: return gr.update(value=None), self.create_leaderboard_data(selected_emotion_or_dataset, selected_feature, emotion_or_dataset) else: return gr.update(), leaderboard_table def update_leaderboard_data_by_feature(self, emotion, dataset, selected_feature) -> pd.DataFrame: ''' Update leaderboard data based on the selected feature Args: emotion (str): Currently selected emotion to filter the leaderboard data. dataset (str): Currently selected dataset to filter the leaderboard data selected_feature (str): The selected feature to display in the leaderboard. Returns: pd.DataFrame: A dataframe containing the leaderboard data. ''' if emotion != None: return self.create_leaderboard_data(emotion, selected_feature, 'emotion') else: return self.create_leaderboard_data(dataset, selected_feature, 'dataset')