Spaces:
Build error
Build error
| import streamlit as st | |
| import os | |
| from datasets import load_dataset | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import argilla as rg | |
| from datetime import datetime | |
| ARGILLA_API_URL = os.environ.get("ARGILLA_API_URL") | |
| ARGILLA_API_KEY = os.environ.get("ARGILLA_API_KEY") | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| client = rg.Argilla( | |
| api_url=ARGILLA_API_URL, | |
| api_key=ARGILLA_API_KEY | |
| ) | |
| workspace = client.workspaces('cohere') | |
| users_map = {str(user.id):user.username for user in list(workspace.users)} | |
| ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN) | |
| df = ds.to_pandas() | |
| st.title("π MMLU Translation Review Progress π") | |
| st.markdown(f"**Total tasks completed:** {len(ds)}") | |
| # Get the current local time | |
| now = datetime.now() | |
| top_of_the_hour = now.replace(minute=0, second=0, microsecond=0) | |
| # Calculate the minutes past the top of the hour | |
| minutes_past = (now - top_of_the_hour).seconds // 60 | |
| # Display the time as X minutes ago | |
| st.markdown(f"**Last updated:** {minutes_past} minutes ago") | |
| st.header("Progress by Language") | |
| # Extract the language from the metadata column and create a new column | |
| df['language'] = df['metadata'].apply(lambda x: x.get('language')) | |
| # Count the occurrences of each language | |
| language_counts = df['language'].value_counts() | |
| # Plotting the bar chart using matplotlib | |
| fig, ax = plt.subplots() | |
| language_counts.plot(kind='bar', ax=ax) | |
| ax.set_title('Number of Completed Tasks for Each Language') | |
| ax.set_xlabel('Language') | |
| ax.set_ylabel('Count') | |
| # Convert the language counts to a DataFrame for display in the table | |
| language_counts_df = language_counts.reset_index() | |
| language_counts_df.columns = ['Language', 'Count'] | |
| # Display the table in the Streamlit app | |
| st.table(language_counts_df) | |
| # Display the plot in the Streamlit app | |
| st.pyplot(fig) | |
| st.header("Leaderboard") | |
| # Extract user_id from the is_edit_required field in the response column and count occurrences | |
| user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id']) | |
| user_id_counts = user_ids.value_counts() | |
| # Map user IDs to usernames | |
| user_id_counts.index = user_id_counts.index.map(users_map) | |
| # Convert the user ID counts to a DataFrame for display in the table | |
| user_id_counts_df = user_id_counts.reset_index() | |
| user_id_counts_df.columns = ['Username', 'Count'] | |
| # Display the table of username counts in the Streamlit app | |
| st.table(user_id_counts_df) | |
| st.header("Raw Dataset") | |
| st.dataframe(df) |