Spaces:

a-ghorbani
/

ai-phone-leaderboard

Running

App Files Files Community

agh123 commited on Dec 29, 2024

Commit

d2c5913

1 Parent(s): 9543568

update the code based on data format change

Browse files

Files changed (6) hide show

.gitattributes +1 -0
main.py +8 -76
src/app.py +51 -12
src/components/filters.py +192 -36
src/components/visualizations.py +236 -53
src/services/firebase.py +85 -42

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+src/static/images/Bench.gif filter=lfs diff=lfs merge=lfs -text

main.py CHANGED Viewed

@@ -1,79 +1,11 @@
-import streamlit as st
-import asyncio
-from src.core.styles import CUSTOM_CSS
-from src.components.header import render_header
-from src.components.filters import render_table_filters, render_plot_filters
-from src.components.visualizations import (
-    render_performance_plots,
-    render_leaderboard_table,
-)
-from src.services.firebase import fetch_leaderboard_data
-# Configure the page
-st.set_page_config(
-    page_title="AI-Phone Leaderboard",
-    page_icon="src/static/images/favicon.png",
-    layout="wide",
-    initial_sidebar_state="expanded",
-)
-# Apply custom CSS
-st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
-async def main():
-    # Render header
-    render_header()
-    # Fetch initial data
-    full_df = await fetch_leaderboard_data()
-    if full_df.empty:
-        st.info("No benchmark data available yet!")
-        return
-    # Get unique values for filters
-    models = sorted(full_df["Model"].unique())
-    benchmarks = sorted(full_df["Benchmark"].unique())
-    platforms = sorted(full_df["Platform"].unique())
-    devices = sorted(full_df["Normalized Device ID"].unique())
-    # Render table filters and get selections
-    (
-        selected_model_table,
-        selected_benchmark_table,
-        selected_platform_table,
-        selected_device_table,
-    ) = render_table_filters(models, benchmarks, platforms, devices)
-    # Filter data for table
-    table_df = full_df.copy()
-    if selected_model_table != "All":
-        table_df = table_df[table_df["Model"] == selected_model_table]
-    if selected_benchmark_table != "All":
-        table_df = table_df[table_df["Benchmark"] == selected_benchmark_table]
-    if selected_platform_table != "All":
-        table_df = table_df[table_df["Platform"] == selected_platform_table]
-    if selected_device_table != "All":
-        table_df = table_df[table_df["Normalized Device ID"] == selected_device_table]
-    # Render leaderboard table
-    render_leaderboard_table(table_df)
-    # Performance plots section
-    st.subheader("Performance Comparison")
-    # Render plot filters and get selections
-    selected_model_plot, selected_benchmark_plot = render_plot_filters(
-        models, benchmarks
-    )
-    # Filter data for plots
-    plot_df = full_df[
-        (full_df["Model"] == selected_model_plot)
-        & (full_df["Benchmark"] == selected_benchmark_plot)
-    ]
-    # Render performance plots
-    render_performance_plots(plot_df, selected_model_plot)
 if __name__ == "__main__":
-    asyncio.run(main())

+"""
+Main module for the frontend application.
+This file serves as a module init file.
+"""
+import asyncio
+import streamlit as st
+from src.app import main
 if __name__ == "__main__":
+    asyncio.run(main())

src/app.py CHANGED Viewed

@@ -1,15 +1,54 @@
 import asyncio
-from typing import Optional
 import pandas as pd
-async def fetch_and_filter_data(
-    model_name: Optional[str] = None,
-    benchmark_label: Optional[str] = None
-) -> pd.DataFrame:
-    """Fetch and filter data based on parameters"""
-    from .services.firebase import fetch_leaderboard_data
-    return await fetch_leaderboard_data(
-        model_name=model_name,
-        benchmark_label=benchmark_label
-    )

 import asyncio
+import streamlit as st
 import pandas as pd
+from typing import Optional, List, Set
+from .components.filters import render_table_filters, render_plot_filters
+from .components.visualizations import (
+    render_leaderboard_table,
+    render_performance_plots,
+)
+from .services.firebase import fetch_leaderboard_data
+def get_unique_values(df: pd.DataFrame) -> tuple[List[str], List[str], List[str]]:
+    """Get unique values for filters"""
+    models = sorted(df["Model ID"].unique().tolist())
+    platforms = sorted(df["Platform"].unique().tolist())
+    devices = sorted(df["Device"].unique().tolist())
+    return models, platforms, devices
+async def main():
+    """Main application entry point"""
+    st.set_page_config(
+        page_title="AI Phone Benchmark Leaderboard",
+        page_icon="📱",
+        layout="wide",
+    )
+    # Fetch initial data
+    df = await fetch_leaderboard_data()
+    if df.empty:
+        st.error("No data available. Please check your connection and try again.")
+        return
+    # Get unique values for filters
+    models, platforms, devices = get_unique_values(df)
+    # Render table filters in sidebar
+    table_filters = render_table_filters(models, platforms, devices)
+    # Render the main leaderboard table
+    st.title("📱 AI Phone Benchmark Leaderboard")
+    render_leaderboard_table(df, table_filters)
+    # Render plot section
+    st.title("📊 Performance Comparison")
+    plot_filters = render_plot_filters(models, platforms, devices)
+    render_performance_plots(df, plot_filters)
+if __name__ == "__main__":
+    asyncio.run(main())

src/components/filters.py CHANGED Viewed

@@ -1,50 +1,206 @@
 import streamlit as st
-from typing import List, Tuple
-def render_table_filters(
-    models: List[str],
-    benchmarks: List[str],
-    platforms: List[str],
-    devices: List[str]
-) -> Tuple[str, str, str, str]:
-    """Render and handle table filters"""
-    table_filters = st.container()
-    with table_filters:
-        t1, t2, t3, t4 = st.columns(4)
-        with t1:
-            selected_model = st.selectbox(
-                "Model", ["All"] + list(models), key="table_model"
             )
-        with t2:
-            selected_benchmark = st.selectbox(
-                "Benchmark", ["All"] + list(benchmarks), key="table_benchmark"
             )
-        with t3:
-            selected_platform = st.selectbox(
-                "Platform", ["All"] + list(platforms), key="table_platform"
             )
-        with t4:
-            selected_device = st.selectbox(
-                "Device", ["All"] + list(devices), key="table_device"
             )
-    return selected_model, selected_benchmark, selected_platform, selected_device
 def render_plot_filters(
-    models: List[str],
-    benchmarks: List[str]
-) -> Tuple[str, str]:
     """Render and handle plot filters"""
     plot_filters = st.container()
     with plot_filters:
-        p1, p2 = st.columns(2)
         with p1:
-            selected_model = st.selectbox(
-                "Model for Comparison", models, key="plot_model"
-            )
         with p2:
-            selected_benchmark = st.selectbox(
-                "Benchmark for Comparison", benchmarks, key="plot_benchmark"
             )
-    return selected_model, selected_benchmark

 import streamlit as st
+from typing import List, Tuple, Dict, Set
+def render_grouping_options() -> List[str]:
+    """Render grouping options selector"""
+    available_groups = [
+        "Model ID",
+        "Device",
+        "Platform",
+        "n_threads",
+        "flash_attn",
+        "cache_type_k",
+        "cache_type_v",
+        "PP Value",
+        "TG Value",
+    ]
+    default_groups = ["Model ID", "Device", "Platform"]
+    selected_groups = st.multiselect(
+        "Group Results By",
+        options=available_groups,
+        default=default_groups,
+        help="Select columns to group the results by",
+    )
+    return selected_groups
+def render_column_visibility() -> Set[str]:
+    """Render column visibility selector"""
+    column_categories = {
+        "Device Info": [
+            "Device",
+            "Platform",
+            "CPU Cores",
+            "Total Memory (GB)",
+            "Memory Usage (%)",
+        ],
+        "Benchmark Info": [
+            "PP Value",
+            "TG Value",
+            "Prompt Processing",
+            "Token Generation",
+        ],
+        "Model Info": [
+            "Model",
+            "Model Size",
+            "Model ID",
+        ],
+        "Advanced": [
+            "n_threads",
+            "flash_attn",
+            "cache_type_k",
+            "cache_type_v",
+        ],
+    }
+    # Default visible columns
+    default_columns = {
+        "Device",
+        "Platform",
+        "Model",
+        "Model Size",
+        "Prompt Processing",
+        "Token Generation",
+    }
+    with st.expander("Column Visibility", expanded=False):
+        selected_columns = set()
+        for category, columns in column_categories.items():
+            st.subheader(category)
+            for col in columns:
+                if st.checkbox(col, value=col in default_columns):
+                    selected_columns.add(col)
+    return selected_columns
+def render_benchmark_filters() -> Dict:
+    """Render advanced benchmark configuration filters"""
+    with st.expander("Benchmark Configuration", expanded=False):
+        use_custom_config = st.checkbox("Use Custom PP/TG Values", value=False)
+        if use_custom_config:
+            col1, col2 = st.columns(2)
+            with col1:
+                pp_min = st.number_input("Min PP", value=0, step=32)
+                pp_max = st.number_input("Max PP", value=1024, step=32)
+            with col2:
+                tg_min = st.number_input("Min TG", value=0, step=32)
+                tg_max = st.number_input("Max TG", value=512, step=32)
+        else:
+            pp_min = pp_max = tg_min = tg_max = None
+        return {
+            "use_custom_config": use_custom_config,
+            "pp_range": (pp_min, pp_max),
+            "tg_range": (tg_min, tg_max),
+        }
+def render_advanced_filters() -> Dict:
+    """Render advanced settings filters"""
+    with st.expander("Advanced Settings", expanded=False):
+        col1, col2 = st.columns(2)
+        with col1:
+            n_threads = st.multiselect(
+                "Number of Threads", options=[1, 2, 4, 8, 16], default=None
             )
+            flash_attn = st.multiselect(
+                "Flash Attention", options=[True, False], default=None
             )
+        with col2:
+            cache_type = st.multiselect(
+                "Cache Type", options=["f16", "f32"], default=None
             )
+            memory_usage = st.slider(
+                "Max Memory Usage (%)", min_value=0, max_value=100, value=100
             )
+        return {
+            "n_threads": n_threads,
+            "flash_attn": flash_attn,
+            "cache_type": cache_type,
+            "max_memory_usage": memory_usage,
+        }
 def render_plot_filters(
+    models: List[str], platforms: List[str], devices: List[str]
+) -> Dict:
     """Render and handle plot filters"""
     plot_filters = st.container()
     with plot_filters:
+        p1, p2, p3 = st.columns(3)
         with p1:
+            selected_model = st.selectbox("Model for Plot", models, key="plot_model")
         with p2:
+            selected_platform = st.selectbox(
+                "Platform for Plot", ["All"] + list(platforms), key="plot_platform"
             )
+        with p3:
+            selected_device = st.selectbox(
+                "Device for Plot", ["All"] + list(devices), key="plot_device"
+            )
+    # Use the same benchmark and advanced filters as the table
+    benchmark_config = render_benchmark_filters()
+    advanced_settings = render_advanced_filters()
+    return {
+        "basic_filters": {
+            "model": selected_model,
+            "platform": selected_platform,
+            "device": selected_device,
+        },
+        "benchmark_config": benchmark_config,
+        "advanced_settings": advanced_settings,
+    }
+def render_table_filters(
+    models: List[str], platforms: List[str], devices: List[str]
+) -> Dict:
+    """Render and handle all table filters"""
+    st.sidebar.title("Filters")
+    # Basic filters
+    selected_model = st.sidebar.selectbox(
+        "Model", ["All"] + list(models), key="table_model"
+    )
+    selected_platform = st.sidebar.selectbox(
+        "Platform", ["All"] + list(platforms), key="table_platform"
+    )
+    selected_device = st.sidebar.selectbox(
+        "Device", ["All"] + list(devices), key="table_device"
+    )
+    # Grouping options
+    st.sidebar.title("Display Options")
+    grouping = render_grouping_options()
+    # Column visibility
+    visible_columns = render_column_visibility()
+    # Benchmark configuration
+    benchmark_config = render_benchmark_filters()
+    # Advanced settings
+    advanced_settings = render_advanced_filters()
+    return {
+        "basic_filters": {
+            "model": selected_model,
+            "platform": selected_platform,
+            "device": selected_device,
+        },
+        "grouping": grouping,
+        "visible_columns": visible_columns,
+        "benchmark_config": benchmark_config,
+        "advanced_settings": advanced_settings,
+    }

src/components/visualizations.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import streamlit as st
 import plotly.express as px
 import pandas as pd
-from typing import Optional
 def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
     """Create a performance comparison plot"""
@@ -27,93 +28,275 @@ def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
     )
     return fig
-def render_performance_plots(plot_df: pd.DataFrame, model_name: str):
     """Render performance comparison plots"""
-    if plot_df.empty:
-        st.warning(
-            "No data available for the selected model and benchmark combination."
-        )
         return
     col1, col2 = st.columns(2)
     with col1:
         fig1 = create_performance_plot(
-            plot_df,
             "Prompt Processing",
-            f"Prompt Processing Time - {model_name}",
         )
         if fig1:
             st.plotly_chart(fig1, use_container_width=True)
     with col2:
         fig2 = create_performance_plot(
-            plot_df,
             "Token Generation",
-            f"Token Generation Time - {model_name}",
         )
         if fig2:
             st.plotly_chart(fig2, use_container_width=True)
-def render_leaderboard_table(df: pd.DataFrame):
     """Render the leaderboard table with grouped and formatted data"""
-    # Group and average the results
-    grouped_df = (
-        df.groupby(
-            ["Model ID", "Benchmark", "Normalized Device ID", "Platform", "Device", "Model Size", "CPU Cores"]
-        )
-        .agg(
-            {
-                "Prompt Processing": ["mean", "count", "std"],
-                "Token Generation": ["mean", "std"],
-            }
-        )
-        .reset_index()
     )
     # Flatten column names
     grouped_df.columns = [
         col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
     ]
     # Round numeric columns
     numeric_cols = [
-        "Prompt Processing (mean)",
-        "Prompt Processing (std)",
-        "Token Generation (mean)",
-        "Token Generation (std)",
     ]
     grouped_df[numeric_cols] = grouped_df[numeric_cols].round(2)
     # Rename columns for display
-    grouped_df = grouped_df.rename(
-        columns={
-            "Prompt Processing (mean)": "PP Avg (s)",
-            "Prompt Processing (std)": "PP Std",
-            "Prompt Processing (count)": "Runs",
-            "Token Generation (mean)": "TG Avg (s)",
-            "Token Generation (std)": "TG Std",
-        }
-    )
-    # Reorder columns for display
-    display_cols = [
-        "Platform",
-        "Device",
-        "Model ID",
-        "Model Size",
-        "Benchmark",
-        "TG Avg (s)",
-        "TG Std",
-        "PP Avg (s)",
-        "PP Std",
-    ]
     # Display the filtered and grouped table
     st.dataframe(
-        grouped_df[display_cols].sort_values(
-            ["Model Size", "Benchmark", "TG Avg (s)"],
-            ascending=[False, True, True],
-        ),
         use_container_width=True,
         height=400,
-    )

 import streamlit as st
 import plotly.express as px
 import pandas as pd
+from typing import Optional, Dict, List, Set
 def create_performance_plot(df: pd.DataFrame, metric: str, title: str):
     """Create a performance comparison plot"""
     )
     return fig
+def filter_dataframe(df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
+    """Apply all filters to the dataframe"""
+    if df.empty:
+        return df
+    # Basic filters
+    basic_filters = filters["basic_filters"]
+    if basic_filters["model"] != "All":
+        df = df[df["Model ID"] == basic_filters["model"]]
+    if basic_filters["platform"] != "All":
+        df = df[df["Platform"] == basic_filters["platform"]]
+    if basic_filters["device"] != "All":
+        df = df[df["Device"] == basic_filters["device"]]
+    # Benchmark configuration filters
+    benchmark_config = filters["benchmark_config"]
+    if benchmark_config["use_custom_config"]:
+        pp_min, pp_max = benchmark_config["pp_range"]
+        tg_min, tg_max = benchmark_config["tg_range"]
+        # Extract PP/TG values if not already present
+        if "PP Value" not in df.columns:
+            df["PP Value"] = df["Benchmark"].apply(
+                lambda x: int(x.split("pp: ")[1].split(",")[0])
+            )
+        if "TG Value" not in df.columns:
+            df["TG Value"] = df["Benchmark"].apply(
+                lambda x: int(x.split("tg: ")[1].split(")")[0])
+            )
+        df = df[
+            (df["PP Value"] >= pp_min)
+            & (df["PP Value"] <= pp_max)
+            & (df["TG Value"] >= tg_min)
+            & (df["TG Value"] <= tg_max)
+        ]
+    # Advanced settings filters
+    advanced = filters["advanced_settings"]
+    if advanced["n_threads"]:
+        df["n_threads"] = df["initSettings"].apply(lambda x: x.get("n_threads"))
+        df = df[df["n_threads"].isin(advanced["n_threads"])]
+    if advanced["flash_attn"]:
+        df["flash_attn"] = df["initSettings"].apply(lambda x: x.get("flash_attn"))
+        df = df[df["flash_attn"].isin(advanced["flash_attn"])]
+    if advanced["cache_type"]:
+        df["cache_type_k"] = df["initSettings"].apply(lambda x: x.get("cache_type_k"))
+        df["cache_type_v"] = df["initSettings"].apply(lambda x: x.get("cache_type_v"))
+        df = df[
+            (df["cache_type_k"].isin(advanced["cache_type"]))
+            & (df["cache_type_v"].isin(advanced["cache_type"]))
+        ]
+    if advanced["max_memory_usage"] < 100:
+        df = df[df["Memory Usage (%)"] <= advanced["max_memory_usage"]]
+    return df
+def render_performance_plots(df: pd.DataFrame, filters: Dict):
     """Render performance comparison plots"""
+    if df.empty:
+        st.warning("No data available for plotting.")
         return
+    # Apply filters
+    filtered_df = filter_dataframe(df, filters)
+    if filtered_df.empty:
+        st.warning("No data matches the selected filters for plotting.")
+        return
+    # Extract PP/TG values if not already present
+    if "PP Value" not in filtered_df.columns:
+        filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
+            lambda x: int(x.split("pp: ")[1].split(",")[0])
+        )
+    if "TG Value" not in filtered_df.columns:
+        filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
+            lambda x: int(x.split("tg: ")[1].split(")")[0])
+        )
+    # Extract initSettings if not already present
+    if "n_threads" not in filtered_df.columns:
+        filtered_df["n_threads"] = filtered_df["initSettings"].apply(
+            lambda x: x.get("n_threads")
+        )
+        filtered_df["flash_attn"] = filtered_df["initSettings"].apply(
+            lambda x: x.get("flash_attn")
+        )
+        filtered_df["cache_type_k"] = filtered_df["initSettings"].apply(
+            lambda x: x.get("cache_type_k")
+        )
+        filtered_df["cache_type_v"] = filtered_df["initSettings"].apply(
+            lambda x: x.get("cache_type_v")
+        )
+    # Group by device and platform for plotting
+    plot_group = (
+        filtered_df.groupby(["Device", "Platform"])
+        .agg(
+            {
+                "Prompt Processing": "mean",
+                "Token Generation": "mean",
+                "Memory Usage (%)": "mean",
+                "Memory Usage (GB)": "mean",
+                "CPU Cores": "first",
+                "Model Size": "first",
+                "PP Value": "first",
+                "TG Value": "first",
+            }
+        )
+        .reset_index()
+    )
     col1, col2 = st.columns(2)
     with col1:
         fig1 = create_performance_plot(
+            plot_group,
             "Prompt Processing",
+            f"Prompt Processing Time (PP: {plot_group['PP Value'].iloc[0]})",
         )
         if fig1:
             st.plotly_chart(fig1, use_container_width=True)
     with col2:
         fig2 = create_performance_plot(
+            plot_group,
             "Token Generation",
+            f"Token Generation Time (TG: {plot_group['TG Value'].iloc[0]})",
         )
         if fig2:
             st.plotly_chart(fig2, use_container_width=True)
+def render_leaderboard_table(df: pd.DataFrame, filters: Dict):
     """Render the leaderboard table with grouped and formatted data"""
+    if df.empty:
+        st.warning("No data available for the selected filters.")
+        return
+    # Apply filters
+    filtered_df = filter_dataframe(df, filters)
+    if filtered_df.empty:
+        st.warning("No data matches the selected filters.")
+        return
+    # Extract settings from benchmark results
+    filtered_df["PP Value"] = filtered_df["Benchmark"].apply(
+        lambda x: int(x.split("pp: ")[1].split(",")[0])
+    )
+    filtered_df["TG Value"] = filtered_df["Benchmark"].apply(
+        lambda x: int(x.split("tg: ")[1].split(")")[0])
     )
+    # Extract initSettings
+    filtered_df["n_threads"] = filtered_df["initSettings"].apply(
+        lambda x: x.get("n_threads")
+    )
+    filtered_df["flash_attn"] = filtered_df["initSettings"].apply(
+        lambda x: x.get("flash_attn")
+    )
+    filtered_df["cache_type_k"] = filtered_df["initSettings"].apply(
+        lambda x: x.get("cache_type_k")
+    )
+    filtered_df["cache_type_v"] = filtered_df["initSettings"].apply(
+        lambda x: x.get("cache_type_v")
+    )
+    # Group by selected columns
+    grouping_cols = filters["grouping"]
+    if not grouping_cols:
+        grouping_cols = ["Model ID", "Device", "Platform"]  # Default grouping
+    agg_dict = {
+        "Prompt Processing": ["mean", "count", "std"],
+        "Token Generation": ["mean", "std"],
+        "Memory Usage (%)": "mean",
+        "Memory Usage (GB)": "mean",
+        "Total Memory (GB)": "first",
+        "CPU Cores": "first",
+        "Model Size": "first",
+        "PP Value": "first",
+        "TG Value": "first",
+        "n_threads": "first",
+        "flash_attn": "first",
+        "cache_type_k": "first",
+        "cache_type_v": "first",
+    }
+    grouped_df = filtered_df.groupby(grouping_cols).agg(agg_dict).reset_index()
     # Flatten column names
     grouped_df.columns = [
         col[0] if col[1] == "" else f"{col[0]} ({col[1]})" for col in grouped_df.columns
     ]
+    # Sort by Model Size, PP Value, and TG time
+    grouped_df = grouped_df.sort_values(
+        by=["Model Size (first)", "PP Value (first)", "Token Generation (mean)"],
+        ascending=[False, True, True],
+    )
     # Round numeric columns
     numeric_cols = [
+        col
+        for col in grouped_df.columns
+        if any(x in col for x in ["mean", "std", "Memory", "Size"])
     ]
     grouped_df[numeric_cols] = grouped_df[numeric_cols].round(2)
     # Rename columns for display
+    column_mapping = {
+        "Prompt Processing (mean)": "PP Avg (ms)",
+        "Prompt Processing (std)": "PP Std",
+        "Prompt Processing (count)": "Runs",
+        "Token Generation (mean)": "TG Avg (ms)",
+        "Token Generation (std)": "TG Std",
+        "Memory Usage (%) (mean)": "Memory Usage (%)",
+        "Memory Usage (GB) (mean)": "Memory Usage (GB)",
+        "PP Value (first)": "PP Value",
+        "TG Value (first)": "TG Value",
+    }
+    grouped_df = grouped_df.rename(columns=column_mapping)
+    # Filter visible columns
+    visible_cols = filters["visible_columns"]
+    if visible_cols:
+        # Map the user-friendly names to actual column names
+        column_name_mapping = {
+            "Device": "Device",
+            "Platform": "Platform",
+            "CPU Cores": "CPU Cores (first)",
+            "Total Memory (GB)": "Total Memory (GB) (first)",
+            "Memory Usage (%)": "Memory Usage (%)",
+            "PP Value": "PP Value",
+            "TG Value": "TG Value",
+            "Prompt Processing": "PP Avg (ms)",
+            "Token Generation": "TG Avg (ms)",
+            "Model": "Model ID",
+            "Model Size": "Model Size (first)",
+            "Model ID": "Model ID",
+            "n_threads": "n_threads (first)",
+            "flash_attn": "flash_attn (first)",
+            "cache_type_k": "cache_type_k (first)",
+            "cache_type_v": "cache_type_v (first)",
+        }
+        display_cols = [
+            column_name_mapping[col]
+            for col in visible_cols
+            if col in column_name_mapping
+        ]
+    else:
+        # Default columns if none selected
+        display_cols = [
+            "Device",
+            "Platform",
+            "Model ID",
+            "Model Size (first)",
+            "PP Avg (ms)",
+            "TG Avg (ms)",
+            "Memory Usage (%)",
+        ]
     # Display the filtered and grouped table
     st.dataframe(
+        grouped_df[display_cols],
         use_container_width=True,
         height=400,
+    )

src/services/firebase.py CHANGED Viewed

@@ -5,6 +5,7 @@ import pandas as pd
 import streamlit as st
 import json
 def initialize_firebase():
     """Initialize Firebase with credentials"""
     try:
@@ -16,17 +17,20 @@ def initialize_firebase():
         firebase_admin.initialize_app(cred)
     return firestore.client()
 db = initialize_firebase()
 def normalize_device_id(device_info: dict) -> str:
     """Normalize device identifier for aggregation"""
     emulator = "/Emulator" if device_info["isEmulator"] else ""
     if device_info["systemName"].lower() == "ios":
         return f"iOS/{device_info['model']}{emulator}"
     memory_tier = f"{device_info['totalMemory'] // (1024**3)}GB"
     return f"{device_info['brand']}/{device_info['model']}/{memory_tier}{emulator}"
 def format_params_in_b(params: int) -> float:
     """Format number of parameters in billions"""
     b_value = params / 1e9
@@ -37,78 +41,117 @@ def format_params_in_b(params: int) -> float:
     else:
         return round(b_value, 3)
 def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
     """Format submissions for leaderboard display"""
     formatted_data = []
     for sub in submissions:
         try:
-            benchmark_result = sub.get('benchmarkResult', {})
-            device_info = sub.get('deviceInfo', {})
             if not benchmark_result or not device_info:
                 continue
-            formatted_data.append({
-                "Device": f"{device_info.get('model', 'Unknown')} [Emulator]" if device_info.get('isEmulator') else device_info.get('model', 'Unknown'),
-                "Platform": device_info.get('systemName', 'Unknown'),
-                "Benchmark": f"{benchmark_result.get('config', {}).get('label', 'Unknown')} (pp: {benchmark_result.get('config', {}).get('pp', 'N/A')}, tg: {benchmark_result.get('config', {}).get('tg', 'N/A')})",
-                "Model": benchmark_result.get('modelName', 'Unknown'),
-                "Model Size": format_params_in_b(benchmark_result.get('modelNParams', 0)),
-                "Prompt Processing": round(benchmark_result.get('ppAvg', 0), 2),
-                "Token Generation": round(benchmark_result.get('tgAvg', 0), 2),
-                "Memory Usage (%)": benchmark_result.get('peakMemoryUsage', {}).get('percentage'),
-                "Memory Usage (GB)": round(benchmark_result.get('peakMemoryUsage', {}).get('used', 0) / (1024**3), 2) if benchmark_result.get('peakMemoryUsage', {}).get('used') else None,
-                "Total Memory (GB)": round(device_info.get('totalMemory', 0) / (1024**3), 2),
-                "CPU Cores": device_info.get('cpuDetails', {}).get('cores', 'Unknown'),
-                "Normalized Device ID": normalize_device_id(device_info),
-                "Timestamp": benchmark_result.get('timestamp', 'Unknown'),
-                "Model ID": benchmark_result.get('modelId', 'Unknown'),
-                "OID": benchmark_result.get('oid'),
-            })
         except Exception as e:
             st.warning(f"Error processing submission: {str(e)}")
             continue
     return pd.DataFrame(formatted_data)
 async def fetch_leaderboard_data(
-    model_name: Optional[str] = None,
-    benchmark_label: Optional[str] = None
 ) -> pd.DataFrame:
     """Fetch and process leaderboard data from Firestore"""
     try:
         # Navigate to the correct collection path: benchmarks/v1/submissions
-        submissions_ref = db.collection('benchmarks').document('v1').collection('submissions')
         # Get all documents
         docs = submissions_ref.stream()
         all_docs = list(docs)
         if len(all_docs) == 0:
             return pd.DataFrame()
         # Process documents and filter in memory
         submissions = []
         for doc in all_docs:
             data = doc.to_dict()
-            if not data or 'benchmarkResult' not in data:
                 continue
-            benchmark_result = data['benchmarkResult']
             # Apply filters
-            if model_name and model_name != "All" and benchmark_result.get('modelName') != model_name:
                 continue
-            if benchmark_label and benchmark_label != "All" and benchmark_result.get('config', {}).get('label') != benchmark_label:
                 continue
             submissions.append(data)
         return format_leaderboard_data(submissions)
     except Exception as e:
         st.error(f"Error fetching data from Firestore: {str(e)}")
-        return pd.DataFrame()

 import streamlit as st
 import json
 def initialize_firebase():
     """Initialize Firebase with credentials"""
     try:
         firebase_admin.initialize_app(cred)
     return firestore.client()
 db = initialize_firebase()
 def normalize_device_id(device_info: dict) -> str:
     """Normalize device identifier for aggregation"""
     emulator = "/Emulator" if device_info["isEmulator"] else ""
     if device_info["systemName"].lower() == "ios":
         return f"iOS/{device_info['model']}{emulator}"
     memory_tier = f"{device_info['totalMemory'] // (1024**3)}GB"
     return f"{device_info['brand']}/{device_info['model']}/{memory_tier}{emulator}"
 def format_params_in_b(params: int) -> float:
     """Format number of parameters in billions"""
     b_value = params / 1e9
     else:
         return round(b_value, 3)
 def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
     """Format submissions for leaderboard display"""
     formatted_data = []
     for sub in submissions:
         try:
+            benchmark_result = sub.get("benchmarkResult", {})
+            device_info = sub.get("deviceInfo", {})
+            # Skip if missing required data
             if not benchmark_result or not device_info:
                 continue
+            # Skip if missing initSettings
+            if "initSettings" not in benchmark_result:
+                continue
+            # Skip emulators
+            if device_info.get("isEmulator", False):
+                continue
+            formatted_data.append(
+                {
+                    "Device": device_info.get("model", "Unknown"),
+                    "Platform": device_info.get("systemName", "Unknown"),
+                    "Benchmark": f"{benchmark_result.get('config', {}).get('label', 'Unknown')} (pp: {benchmark_result.get('config', {}).get('pp', 'N/A')}, tg: {benchmark_result.get('config', {}).get('tg', 'N/A')})",
+                    "Model": benchmark_result.get("modelName", "Unknown"),
+                    "Model Size": format_params_in_b(
+                        benchmark_result.get("modelNParams", 0)
+                    ),
+                    "Prompt Processing": round(benchmark_result.get("ppAvg", 0), 2),
+                    "Token Generation": round(benchmark_result.get("tgAvg", 0), 2),
+                    "Memory Usage (%)": benchmark_result.get("peakMemoryUsage", {}).get(
+                        "percentage"
+                    ),
+                    "Memory Usage (GB)": (
+                        round(
+                            benchmark_result.get("peakMemoryUsage", {}).get("used", 0)
+                            / (1024**3),
+                            2,
+                        )
+                        if benchmark_result.get("peakMemoryUsage", {}).get("used")
+                        else None
+                    ),
+                    "Total Memory (GB)": round(
+                        device_info.get("totalMemory", 0) / (1024**3), 2
+                    ),
+                    "CPU Cores": device_info.get("cpuDetails", {}).get(
+                        "cores", "Unknown"
+                    ),
+                    "Normalized Device ID": normalize_device_id(device_info),
+                    "Timestamp": benchmark_result.get("timestamp", "Unknown"),
+                    "Model ID": benchmark_result.get("modelId", "Unknown"),
+                    "OID": benchmark_result.get("oid"),
+                    "initSettings": benchmark_result.get("initSettings"),
+                }
+            )
         except Exception as e:
             st.warning(f"Error processing submission: {str(e)}")
             continue
     return pd.DataFrame(formatted_data)
 async def fetch_leaderboard_data(
+    model_name: Optional[str] = None, benchmark_label: Optional[str] = None
 ) -> pd.DataFrame:
     """Fetch and process leaderboard data from Firestore"""
     try:
         # Navigate to the correct collection path: benchmarks/v1/submissions
+        submissions_ref = (
+            db.collection("benchmarks").document("v1").collection("submissions")
+        )
         # Get all documents
         docs = submissions_ref.stream()
         all_docs = list(docs)
         if len(all_docs) == 0:
             return pd.DataFrame()
         # Process documents and filter in memory
         submissions = []
         for doc in all_docs:
             data = doc.to_dict()
+            if not data or "benchmarkResult" not in data:
                 continue
+            benchmark_result = data["benchmarkResult"]
             # Apply filters
+            if (
+                model_name
+                and model_name != "All"
+                and benchmark_result.get("modelName") != model_name
+            ):
                 continue
+            if (
+                benchmark_label
+                and benchmark_label != "All"
+                and benchmark_result.get("config", {}).get("label") != benchmark_label
+            ):
                 continue
             submissions.append(data)
         return format_leaderboard_data(submissions)
     except Exception as e:
         st.error(f"Error fetching data from Firestore: {str(e)}")
+        return pd.DataFrame()