Spaces:
Sleeping
Sleeping
add support for additional backends (#33)
Browse files- add support for more backend (e37cee2506e5bdffbf68513d4e57d076c2c64c6f)
- remove uncessary warnings (0732974d483476a9eb15d1367abcb8f17069f57d)
- app.py +37 -14
- hardware.yml β hardware.yaml +6 -4
- requirements.txt +3 -2
- src/content.py +6 -6
- src/dependency.py +3 -0
- src/hardware.py +9 -9
- src/llm_perf.py +33 -14
- src/panel.py +67 -22
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
|
|
|
|
| 3 |
from src.assets import custom_css
|
| 4 |
|
| 5 |
# from src.attention import create_attn_plots
|
|
@@ -14,10 +15,13 @@ from src.panel import (
|
|
| 14 |
create_select_callback,
|
| 15 |
)
|
| 16 |
|
| 17 |
-
configs = load_hardware_configs("hardware.
|
| 18 |
|
| 19 |
|
| 20 |
-
demo = gr.Blocks(
|
|
|
|
|
|
|
|
|
|
| 21 |
with demo:
|
| 22 |
gr.HTML(LOGO, elem_classes="logo")
|
| 23 |
gr.HTML(TITLE, elem_classes="title")
|
|
@@ -29,13 +33,13 @@ with demo:
|
|
| 29 |
if config.detail:
|
| 30 |
gr.Markdown(config.detail, elem_classes="descriptive-text")
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
# ####################### CONTROL PANEL #######################
|
| 35 |
(
|
| 36 |
filter_button,
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
| 39 |
score_slider,
|
| 40 |
memory_slider,
|
| 41 |
backend_checkboxes,
|
|
@@ -43,18 +47,33 @@ with demo:
|
|
| 43 |
optimization_checkboxes,
|
| 44 |
quantization_checkboxes,
|
| 45 |
kernels_checkboxes,
|
| 46 |
-
) = create_control_panel(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
####################### HARDWARE SUBTABS #######################
|
| 48 |
with gr.Tabs(elem_classes="subtabs"):
|
| 49 |
-
open_llm_perf_df = get_llm_perf_df(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
####################### LEADERBOARD TAB #######################
|
| 51 |
with gr.TabItem("Leaderboard π
", id=0):
|
| 52 |
search_bar, columns_checkboxes, leaderboard_table = (
|
| 53 |
create_leaderboard_table(open_llm_perf_df)
|
| 54 |
)
|
| 55 |
-
if
|
|
|
|
|
|
|
| 56 |
with gr.TabItem("Find Your Best Model π§", id=1):
|
| 57 |
-
lat_score_mem_plot = create_lat_score_mem_plot(
|
|
|
|
|
|
|
| 58 |
###################### ATTENTIONS SPEEDUP TAB #######################
|
| 59 |
# with gr.TabItem("Attention π", id=2):
|
| 60 |
# attn_prefill_plot, attn_decode_plot = create_attn_plots(
|
|
@@ -70,8 +89,10 @@ with demo:
|
|
| 70 |
create_control_callback(
|
| 71 |
filter_button,
|
| 72 |
# inputs
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
| 75 |
score_slider,
|
| 76 |
memory_slider,
|
| 77 |
backend_checkboxes,
|
|
@@ -93,8 +114,10 @@ with demo:
|
|
| 93 |
|
| 94 |
create_select_callback(
|
| 95 |
# inputs
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
| 98 |
# interactive
|
| 99 |
columns_checkboxes,
|
| 100 |
search_bar,
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
+
import src.dependency # noqa
|
| 4 |
from src.assets import custom_css
|
| 5 |
|
| 6 |
# from src.attention import create_attn_plots
|
|
|
|
| 15 |
create_select_callback,
|
| 16 |
)
|
| 17 |
|
| 18 |
+
configs = load_hardware_configs("hardware.yaml")
|
| 19 |
|
| 20 |
|
| 21 |
+
demo = gr.Blocks(
|
| 22 |
+
css=custom_css,
|
| 23 |
+
theme=gr.themes.Default(primary_hue="indigo", secondary_hue="indigo"),
|
| 24 |
+
)
|
| 25 |
with demo:
|
| 26 |
gr.HTML(LOGO, elem_classes="logo")
|
| 27 |
gr.HTML(TITLE, elem_classes="title")
|
|
|
|
| 33 |
if config.detail:
|
| 34 |
gr.Markdown(config.detail, elem_classes="descriptive-text")
|
| 35 |
|
|
|
|
|
|
|
| 36 |
# ####################### CONTROL PANEL #######################
|
| 37 |
(
|
| 38 |
filter_button,
|
| 39 |
+
machine_value,
|
| 40 |
+
subsets_value,
|
| 41 |
+
backends_value,
|
| 42 |
+
hardware_type_value,
|
| 43 |
score_slider,
|
| 44 |
memory_slider,
|
| 45 |
backend_checkboxes,
|
|
|
|
| 47 |
optimization_checkboxes,
|
| 48 |
quantization_checkboxes,
|
| 49 |
kernels_checkboxes,
|
| 50 |
+
) = create_control_panel(
|
| 51 |
+
machine=config.machine,
|
| 52 |
+
subsets=config.subsets,
|
| 53 |
+
backends=config.backends,
|
| 54 |
+
hardware_type=config.hardware_type,
|
| 55 |
+
hardware_provider=config.hardware_provider,
|
| 56 |
+
)
|
| 57 |
####################### HARDWARE SUBTABS #######################
|
| 58 |
with gr.Tabs(elem_classes="subtabs"):
|
| 59 |
+
open_llm_perf_df = get_llm_perf_df(
|
| 60 |
+
machine=config.machine,
|
| 61 |
+
subsets=config.subsets,
|
| 62 |
+
backends=config.backends,
|
| 63 |
+
hardware_type=config.hardware_type,
|
| 64 |
+
)
|
| 65 |
####################### LEADERBOARD TAB #######################
|
| 66 |
with gr.TabItem("Leaderboard π
", id=0):
|
| 67 |
search_bar, columns_checkboxes, leaderboard_table = (
|
| 68 |
create_leaderboard_table(open_llm_perf_df)
|
| 69 |
)
|
| 70 |
+
if (
|
| 71 |
+
config.hardware_provider != "intel"
|
| 72 |
+
): # TODO intel CPU does not measure the memory requirements correctly, so disable the graph feature until we fix the underlying issue
|
| 73 |
with gr.TabItem("Find Your Best Model π§", id=1):
|
| 74 |
+
lat_score_mem_plot = create_lat_score_mem_plot(
|
| 75 |
+
open_llm_perf_df
|
| 76 |
+
)
|
| 77 |
###################### ATTENTIONS SPEEDUP TAB #######################
|
| 78 |
# with gr.TabItem("Attention π", id=2):
|
| 79 |
# attn_prefill_plot, attn_decode_plot = create_attn_plots(
|
|
|
|
| 89 |
create_control_callback(
|
| 90 |
filter_button,
|
| 91 |
# inputs
|
| 92 |
+
machine_value,
|
| 93 |
+
subsets_value,
|
| 94 |
+
backends_value,
|
| 95 |
+
hardware_type_value,
|
| 96 |
score_slider,
|
| 97 |
memory_slider,
|
| 98 |
backend_checkboxes,
|
|
|
|
| 114 |
|
| 115 |
create_select_callback(
|
| 116 |
# inputs
|
| 117 |
+
machine_value,
|
| 118 |
+
subsets_value,
|
| 119 |
+
backends_value,
|
| 120 |
+
hardware_type_value,
|
| 121 |
# interactive
|
| 122 |
columns_checkboxes,
|
| 123 |
search_bar,
|
hardware.yml β hardware.yaml
RENAMED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
- machine: 1xA10
|
| 2 |
description: A10-24GB-150W π₯οΈ
|
| 3 |
hardware_provider: nvidia
|
| 4 |
-
hardware_type:
|
| 5 |
subsets:
|
| 6 |
- unquantized
|
| 7 |
- awq
|
|
@@ -13,7 +13,7 @@
|
|
| 13 |
- machine: 1xA100
|
| 14 |
description: A100-80GB-275W π₯οΈ
|
| 15 |
hardware_provider: nvidia
|
| 16 |
-
hardware_type:
|
| 17 |
subsets:
|
| 18 |
- unquantized
|
| 19 |
- awq
|
|
@@ -25,7 +25,7 @@
|
|
| 25 |
- machine: 1xT4
|
| 26 |
description: T4-16GB-70W π₯οΈ
|
| 27 |
hardware_provider: nvidia
|
| 28 |
-
hardware_type:
|
| 29 |
subsets:
|
| 30 |
- unquantized
|
| 31 |
- awq
|
|
@@ -43,4 +43,6 @@
|
|
| 43 |
subsets:
|
| 44 |
- unquantized
|
| 45 |
backends:
|
| 46 |
-
- pytorch
|
|
|
|
|
|
|
|
|
| 1 |
- machine: 1xA10
|
| 2 |
description: A10-24GB-150W π₯οΈ
|
| 3 |
hardware_provider: nvidia
|
| 4 |
+
hardware_type: cuda
|
| 5 |
subsets:
|
| 6 |
- unquantized
|
| 7 |
- awq
|
|
|
|
| 13 |
- machine: 1xA100
|
| 14 |
description: A100-80GB-275W π₯οΈ
|
| 15 |
hardware_provider: nvidia
|
| 16 |
+
hardware_type: cuda
|
| 17 |
subsets:
|
| 18 |
- unquantized
|
| 19 |
- awq
|
|
|
|
| 25 |
- machine: 1xT4
|
| 26 |
description: T4-16GB-70W π₯οΈ
|
| 27 |
hardware_provider: nvidia
|
| 28 |
+
hardware_type: cuda
|
| 29 |
subsets:
|
| 30 |
- unquantized
|
| 31 |
- awq
|
|
|
|
| 43 |
subsets:
|
| 44 |
- unquantized
|
| 45 |
backends:
|
| 46 |
+
- pytorch
|
| 47 |
+
- openvino
|
| 48 |
+
- onnxruntime
|
requirements.txt
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
huggingface_hub
|
| 2 |
transformers
|
| 3 |
-
gradio
|
| 4 |
plotly
|
| 5 |
-
pandas
|
|
|
|
|
|
| 1 |
huggingface_hub
|
| 2 |
transformers
|
| 3 |
+
gradio>=5.0.0
|
| 4 |
plotly
|
| 5 |
+
pandas
|
| 6 |
+
ruff
|
src/content.py
CHANGED
|
@@ -5,18 +5,18 @@ TITLE = """<h1 align="center" id="space-title">π€ LLM-Perf Leaderboard ποΈ
|
|
| 5 |
ABOUT = """
|
| 6 |
## π About
|
| 7 |
The π€ LLM-Perf Leaderboard ποΈ is a laderboard at the intersection of quality and performance.
|
| 8 |
-
Its aim is to benchmark the performance (latency, throughput, memory & energy)
|
| 9 |
-
of Large Language Models (LLMs) with different hardwares, backends and optimizations
|
| 10 |
using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
|
| 11 |
|
| 12 |
-
Anyone from the community can request a new base model or hardware/backend/optimization
|
| 13 |
configuration for automated benchmarking:
|
| 14 |
|
| 15 |
-
- Model evaluation requests should be made in the
|
| 16 |
[π€ Open LLM Leaderboard π
](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
|
| 17 |
we scrape the [list of canonical base models](https://github.com/huggingface/optimum-benchmark/blob/main/llm_perf/utils.py) from there.
|
| 18 |
-
- Hardware/Backend/Optimization configuration requests should be made in the
|
| 19 |
-
[π€ LLM-Perf Leaderboard ποΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or
|
| 20 |
[Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
|
| 21 |
|
| 22 |
## βοΈ Details
|
|
|
|
| 5 |
ABOUT = """
|
| 6 |
## π About
|
| 7 |
The π€ LLM-Perf Leaderboard ποΈ is a laderboard at the intersection of quality and performance.
|
| 8 |
+
Its aim is to benchmark the performance (latency, throughput, memory & energy)
|
| 9 |
+
of Large Language Models (LLMs) with different hardwares, backends and optimizations
|
| 10 |
using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
|
| 11 |
|
| 12 |
+
Anyone from the community can request a new base model or hardware/backend/optimization
|
| 13 |
configuration for automated benchmarking:
|
| 14 |
|
| 15 |
+
- Model evaluation requests should be made in the
|
| 16 |
[π€ Open LLM Leaderboard π
](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
|
| 17 |
we scrape the [list of canonical base models](https://github.com/huggingface/optimum-benchmark/blob/main/llm_perf/utils.py) from there.
|
| 18 |
+
- Hardware/Backend/Optimization configuration requests should be made in the
|
| 19 |
+
[π€ LLM-Perf Leaderboard ποΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or
|
| 20 |
[Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
|
| 21 |
|
| 22 |
## βοΈ Details
|
src/dependency.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
src/hardware.py
CHANGED
|
@@ -1,19 +1,19 @@
|
|
| 1 |
-
from typing import Any, Dict, List
|
| 2 |
|
| 3 |
import yaml
|
| 4 |
|
| 5 |
|
| 6 |
class HardwareConfig:
|
| 7 |
def __init__(self, data: Dict[str, Any]):
|
| 8 |
-
self.machine = data["machine"]
|
| 9 |
-
self.description = data["description"]
|
| 10 |
-
self.hardware_provider = data["hardware_provider"]
|
| 11 |
-
self.hardware_type = data["hardware_type"]
|
| 12 |
-
self.subsets = data["subsets"]
|
| 13 |
-
self.backends = data["backends"]
|
| 14 |
-
self.detail = data.get("detail", None)
|
| 15 |
|
| 16 |
-
def __repr__(self):
|
| 17 |
return (
|
| 18 |
f"HardwareConfig(machine='{self.machine}', description='{self.description}', "
|
| 19 |
f"hardware_provider={self.hardware_provider}, hardware_type={self.hardware_type}, subsets={self.subsets}, backends={self.backends})"
|
|
|
|
| 1 |
+
from typing import Any, Dict, List, Optional
|
| 2 |
|
| 3 |
import yaml
|
| 4 |
|
| 5 |
|
| 6 |
class HardwareConfig:
|
| 7 |
def __init__(self, data: Dict[str, Any]):
|
| 8 |
+
self.machine: str = data["machine"]
|
| 9 |
+
self.description: str = data["description"]
|
| 10 |
+
self.hardware_provider: str = data["hardware_provider"]
|
| 11 |
+
self.hardware_type: str = data["hardware_type"]
|
| 12 |
+
self.subsets: List[str] = data["subsets"]
|
| 13 |
+
self.backends: List[str] = data["backends"]
|
| 14 |
+
self.detail: Optional[str] = data.get("detail", None)
|
| 15 |
|
| 16 |
+
def __repr__(self) -> str:
|
| 17 |
return (
|
| 18 |
f"HardwareConfig(machine='{self.machine}', description='{self.description}', "
|
| 19 |
f"hardware_provider={self.hardware_provider}, hardware_type={self.hardware_type}, subsets={self.subsets}, backends={self.backends})"
|
src/llm_perf.py
CHANGED
|
@@ -3,8 +3,6 @@ from typing import List
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
-
from src.hardware import HardwareConfig
|
| 7 |
-
|
| 8 |
from .utils import process_kernels, process_quantizations
|
| 9 |
|
| 10 |
DATASET_DIRECTORY = "dataset"
|
|
@@ -34,17 +32,31 @@ SORTING_COLUMNS = ["Open LLM Score (%)", "Decode (tokens/s)", "Prefill (s)"]
|
|
| 34 |
SORTING_ASCENDING = [False, True, False]
|
| 35 |
|
| 36 |
|
| 37 |
-
def get_raw_llm_perf_df(
|
|
|
|
|
|
|
| 38 |
dfs = []
|
| 39 |
for subset in subsets:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
)
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
perf_df = pd.concat(dfs)
|
| 50 |
llm_df = pd.read_csv(
|
|
@@ -112,15 +124,22 @@ def processed_llm_perf_df(llm_perf_df):
|
|
| 112 |
return llm_perf_df
|
| 113 |
|
| 114 |
|
| 115 |
-
def get_llm_perf_df(
|
|
|
|
|
|
|
| 116 |
if not os.path.exists(DATASET_DIRECTORY):
|
| 117 |
os.makedirs(DATASET_DIRECTORY)
|
| 118 |
|
| 119 |
if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
|
| 120 |
-
llm_perf_df = pd.read_csv(
|
|
|
|
|
|
|
| 121 |
else:
|
| 122 |
-
|
|
|
|
| 123 |
llm_perf_df = processed_llm_perf_df(llm_perf_df)
|
| 124 |
-
llm_perf_df.to_csv(
|
|
|
|
|
|
|
| 125 |
|
| 126 |
return llm_perf_df
|
|
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
|
|
|
|
|
|
| 6 |
from .utils import process_kernels, process_quantizations
|
| 7 |
|
| 8 |
DATASET_DIRECTORY = "dataset"
|
|
|
|
| 32 |
SORTING_ASCENDING = [False, True, False]
|
| 33 |
|
| 34 |
|
| 35 |
+
def get_raw_llm_perf_df(
|
| 36 |
+
machine: str, subsets: List[str], backends: List[str], hardware_type: str
|
| 37 |
+
):
|
| 38 |
dfs = []
|
| 39 |
for subset in subsets:
|
| 40 |
+
for backend in backends:
|
| 41 |
+
try:
|
| 42 |
+
dfs.append(
|
| 43 |
+
pd.read_csv(
|
| 44 |
+
f"hf://datasets/optimum-benchmark/llm-perf-leaderboard/perf-df-{backend}-{hardware_type}-{subset}-{machine}.csv"
|
| 45 |
+
)
|
| 46 |
)
|
| 47 |
+
except Exception:
|
| 48 |
+
print("Dataset not found for:")
|
| 49 |
+
print(f" β’ Backend: {backend}")
|
| 50 |
+
print(f" β’ Subset: {subset}")
|
| 51 |
+
print(f" β’ Machine: {machine}")
|
| 52 |
+
print(f" β’ Hardware Type: {hardware_type}")
|
| 53 |
+
url = f"https://huggingface.co/datasets/optimum-benchmark/llm-perf-leaderboard/blob/main/perf-df-{backend}-{hardware_type}-{subset}-{machine}.csv"
|
| 54 |
+
print(f" β’ URL: {url}")
|
| 55 |
+
|
| 56 |
+
if len(dfs) == 0:
|
| 57 |
+
raise ValueError(
|
| 58 |
+
f"No datasets found for machine {machine}, check your hardware.yml config file or your datatset on huggingface"
|
| 59 |
+
)
|
| 60 |
|
| 61 |
perf_df = pd.concat(dfs)
|
| 62 |
llm_df = pd.read_csv(
|
|
|
|
| 124 |
return llm_perf_df
|
| 125 |
|
| 126 |
|
| 127 |
+
def get_llm_perf_df(
|
| 128 |
+
machine: str, subsets: List[str], backends: List[str], hardware_type: str
|
| 129 |
+
):
|
| 130 |
if not os.path.exists(DATASET_DIRECTORY):
|
| 131 |
os.makedirs(DATASET_DIRECTORY)
|
| 132 |
|
| 133 |
if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
|
| 134 |
+
llm_perf_df = pd.read_csv(
|
| 135 |
+
f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"
|
| 136 |
+
)
|
| 137 |
else:
|
| 138 |
+
print(f"Dataset machine {machine} not found, downloading...")
|
| 139 |
+
llm_perf_df = get_raw_llm_perf_df(machine, subsets, backends, hardware_type)
|
| 140 |
llm_perf_df = processed_llm_perf_df(llm_perf_df)
|
| 141 |
+
llm_perf_df.to_csv(
|
| 142 |
+
f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False
|
| 143 |
+
)
|
| 144 |
|
| 145 |
return llm_perf_df
|
src/panel.py
CHANGED
|
@@ -10,17 +10,30 @@ from src.llm_perf import get_llm_perf_df
|
|
| 10 |
from src.map import get_lat_score_mem_fig
|
| 11 |
|
| 12 |
|
| 13 |
-
def create_control_panel(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# controls
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
| 18 |
|
| 19 |
if hardware_provider == "nvidia":
|
| 20 |
backends = ["pytorch"]
|
| 21 |
attention_implementations = ["Eager", "SDPA", "FAv2"]
|
| 22 |
quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit"]
|
| 23 |
-
kernels = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
elif hardware_provider == "intel":
|
| 25 |
backends = ["pytorch", "onnxruntime", "openvino"]
|
| 26 |
attention_implementations = ["Eager"]
|
|
@@ -29,7 +42,6 @@ def create_control_panel(machine: str, subsets: List[str], hardware_provider: st
|
|
| 29 |
else:
|
| 30 |
raise ValueError(f"Unknown hardware provider: {hardware_provider}")
|
| 31 |
|
| 32 |
-
|
| 33 |
with gr.Accordion("Control Panel ποΈ", open=False, elem_id="control-panel"):
|
| 34 |
with gr.Row():
|
| 35 |
with gr.Column(scale=2, variant="panel"):
|
|
@@ -101,8 +113,10 @@ def create_control_panel(machine: str, subsets: List[str], hardware_provider: st
|
|
| 101 |
|
| 102 |
return (
|
| 103 |
filter_button,
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
| 106 |
score_slider,
|
| 107 |
memory_slider,
|
| 108 |
backend_checkboxes,
|
|
@@ -116,10 +130,12 @@ def create_control_panel(machine: str, subsets: List[str], hardware_provider: st
|
|
| 116 |
def filter_rows_fn(
|
| 117 |
machine,
|
| 118 |
subsets,
|
|
|
|
|
|
|
| 119 |
# inputs
|
| 120 |
score,
|
| 121 |
memory,
|
| 122 |
-
|
| 123 |
precisions,
|
| 124 |
attentions,
|
| 125 |
quantizations,
|
|
@@ -128,12 +144,14 @@ def filter_rows_fn(
|
|
| 128 |
columns,
|
| 129 |
search,
|
| 130 |
):
|
| 131 |
-
llm_perf_df = get_llm_perf_df(
|
|
|
|
|
|
|
| 132 |
# print(attentions)
|
| 133 |
# print(llm_perf_df["Attention ποΈ"].unique())
|
| 134 |
filtered_llm_perf_df = llm_perf_df[
|
| 135 |
llm_perf_df["Model π€"].str.contains(search, case=False)
|
| 136 |
-
& llm_perf_df["Backend π"].isin(
|
| 137 |
& llm_perf_df["Precision π₯"].isin(precisions)
|
| 138 |
& llm_perf_df["Attention ποΈ"].isin(attentions)
|
| 139 |
& llm_perf_df["Quantization ποΈ"].isin(quantizations)
|
|
@@ -142,7 +160,7 @@ def filter_rows_fn(
|
|
| 142 |
& (llm_perf_df["Memory (MB)"] <= memory)
|
| 143 |
]
|
| 144 |
selected_filtered_llm_perf_df = select_columns_fn(
|
| 145 |
-
machine, subsets, columns, search, filtered_llm_perf_df
|
| 146 |
)
|
| 147 |
selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
|
| 148 |
# filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
|
|
@@ -168,8 +186,10 @@ def create_control_callback(
|
|
| 168 |
# button
|
| 169 |
filter_button,
|
| 170 |
# fixed
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
| 173 |
# inputs
|
| 174 |
score_slider,
|
| 175 |
memory_slider,
|
|
@@ -195,8 +215,10 @@ def create_control_callback(
|
|
| 195 |
fn=filter_rows_fn,
|
| 196 |
inputs=[
|
| 197 |
# fixed
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
| 200 |
# inputs
|
| 201 |
score_slider,
|
| 202 |
memory_slider,
|
|
@@ -222,9 +244,16 @@ def create_control_callback(
|
|
| 222 |
)
|
| 223 |
|
| 224 |
|
| 225 |
-
def select_columns_fn(
|
|
|
|
|
|
|
| 226 |
if llm_perf_df is None:
|
| 227 |
-
llm_perf_df = get_llm_perf_df(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
|
| 230 |
selected_leaderboard_df = selected_leaderboard_df[
|
|
@@ -237,8 +266,10 @@ def select_columns_fn(machine, subsets, columns, search, llm_perf_df=None):
|
|
| 237 |
|
| 238 |
def create_select_callback(
|
| 239 |
# fixed
|
| 240 |
-
|
| 241 |
-
|
|
|
|
|
|
|
| 242 |
# interactive
|
| 243 |
columns_checkboxes,
|
| 244 |
search_bar,
|
|
@@ -247,11 +278,25 @@ def create_select_callback(
|
|
| 247 |
):
|
| 248 |
columns_checkboxes.change(
|
| 249 |
fn=select_columns_fn,
|
| 250 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
outputs=[leaderboard_table],
|
| 252 |
)
|
| 253 |
search_bar.change(
|
| 254 |
fn=select_columns_fn,
|
| 255 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
outputs=[leaderboard_table],
|
| 257 |
)
|
|
|
|
| 10 |
from src.map import get_lat_score_mem_fig
|
| 11 |
|
| 12 |
|
| 13 |
+
def create_control_panel(
|
| 14 |
+
machine: str,
|
| 15 |
+
subsets: List[str],
|
| 16 |
+
backends: List[str],
|
| 17 |
+
hardware_provider: str,
|
| 18 |
+
hardware_type: str,
|
| 19 |
+
):
|
| 20 |
# controls
|
| 21 |
+
machine_value = gr.State(value=machine)
|
| 22 |
+
subsets_value = gr.State(value=subsets)
|
| 23 |
+
backends_value = gr.State(value=backends)
|
| 24 |
+
hardware_type_value = gr.State(value=hardware_type)
|
| 25 |
|
| 26 |
if hardware_provider == "nvidia":
|
| 27 |
backends = ["pytorch"]
|
| 28 |
attention_implementations = ["Eager", "SDPA", "FAv2"]
|
| 29 |
quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit"]
|
| 30 |
+
kernels = [
|
| 31 |
+
"No Kernel",
|
| 32 |
+
"GPTQ.ExllamaV1",
|
| 33 |
+
"GPTQ.ExllamaV2",
|
| 34 |
+
"AWQ.GEMM",
|
| 35 |
+
"AWQ.GEMV",
|
| 36 |
+
]
|
| 37 |
elif hardware_provider == "intel":
|
| 38 |
backends = ["pytorch", "onnxruntime", "openvino"]
|
| 39 |
attention_implementations = ["Eager"]
|
|
|
|
| 42 |
else:
|
| 43 |
raise ValueError(f"Unknown hardware provider: {hardware_provider}")
|
| 44 |
|
|
|
|
| 45 |
with gr.Accordion("Control Panel ποΈ", open=False, elem_id="control-panel"):
|
| 46 |
with gr.Row():
|
| 47 |
with gr.Column(scale=2, variant="panel"):
|
|
|
|
| 113 |
|
| 114 |
return (
|
| 115 |
filter_button,
|
| 116 |
+
machine_value,
|
| 117 |
+
backends_value,
|
| 118 |
+
hardware_type_value,
|
| 119 |
+
subsets_value,
|
| 120 |
score_slider,
|
| 121 |
memory_slider,
|
| 122 |
backend_checkboxes,
|
|
|
|
| 130 |
def filter_rows_fn(
|
| 131 |
machine,
|
| 132 |
subsets,
|
| 133 |
+
backends,
|
| 134 |
+
hardware_type,
|
| 135 |
# inputs
|
| 136 |
score,
|
| 137 |
memory,
|
| 138 |
+
backend_checkboxes,
|
| 139 |
precisions,
|
| 140 |
attentions,
|
| 141 |
quantizations,
|
|
|
|
| 144 |
columns,
|
| 145 |
search,
|
| 146 |
):
|
| 147 |
+
llm_perf_df = get_llm_perf_df(
|
| 148 |
+
machine=machine, subsets=subsets, backends=backends, hardware_type=hardware_type
|
| 149 |
+
)
|
| 150 |
# print(attentions)
|
| 151 |
# print(llm_perf_df["Attention ποΈ"].unique())
|
| 152 |
filtered_llm_perf_df = llm_perf_df[
|
| 153 |
llm_perf_df["Model π€"].str.contains(search, case=False)
|
| 154 |
+
& llm_perf_df["Backend π"].isin(backend_checkboxes)
|
| 155 |
& llm_perf_df["Precision π₯"].isin(precisions)
|
| 156 |
& llm_perf_df["Attention ποΈ"].isin(attentions)
|
| 157 |
& llm_perf_df["Quantization ποΈ"].isin(quantizations)
|
|
|
|
| 160 |
& (llm_perf_df["Memory (MB)"] <= memory)
|
| 161 |
]
|
| 162 |
selected_filtered_llm_perf_df = select_columns_fn(
|
| 163 |
+
machine, subsets, backends, hardware_type, columns, search, filtered_llm_perf_df
|
| 164 |
)
|
| 165 |
selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
|
| 166 |
# filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
|
|
|
|
| 186 |
# button
|
| 187 |
filter_button,
|
| 188 |
# fixed
|
| 189 |
+
machine_value,
|
| 190 |
+
subsets_value,
|
| 191 |
+
backends_value,
|
| 192 |
+
hardware_type_value,
|
| 193 |
# inputs
|
| 194 |
score_slider,
|
| 195 |
memory_slider,
|
|
|
|
| 215 |
fn=filter_rows_fn,
|
| 216 |
inputs=[
|
| 217 |
# fixed
|
| 218 |
+
machine_value,
|
| 219 |
+
subsets_value,
|
| 220 |
+
backends_value,
|
| 221 |
+
hardware_type_value,
|
| 222 |
# inputs
|
| 223 |
score_slider,
|
| 224 |
memory_slider,
|
|
|
|
| 244 |
)
|
| 245 |
|
| 246 |
|
| 247 |
+
def select_columns_fn(
|
| 248 |
+
machine, subsets, backends, hardware_type, columns, search, llm_perf_df=None
|
| 249 |
+
):
|
| 250 |
if llm_perf_df is None:
|
| 251 |
+
llm_perf_df = get_llm_perf_df(
|
| 252 |
+
machine=machine,
|
| 253 |
+
subsets=subsets,
|
| 254 |
+
backends=backends,
|
| 255 |
+
hardware_type=hardware_type,
|
| 256 |
+
)
|
| 257 |
|
| 258 |
selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
|
| 259 |
selected_leaderboard_df = selected_leaderboard_df[
|
|
|
|
| 266 |
|
| 267 |
def create_select_callback(
|
| 268 |
# fixed
|
| 269 |
+
machine_value,
|
| 270 |
+
subsets_value,
|
| 271 |
+
backends_value,
|
| 272 |
+
hardware_type_value,
|
| 273 |
# interactive
|
| 274 |
columns_checkboxes,
|
| 275 |
search_bar,
|
|
|
|
| 278 |
):
|
| 279 |
columns_checkboxes.change(
|
| 280 |
fn=select_columns_fn,
|
| 281 |
+
inputs=[
|
| 282 |
+
machine_value,
|
| 283 |
+
subsets_value,
|
| 284 |
+
backends_value,
|
| 285 |
+
hardware_type_value,
|
| 286 |
+
columns_checkboxes,
|
| 287 |
+
search_bar,
|
| 288 |
+
],
|
| 289 |
outputs=[leaderboard_table],
|
| 290 |
)
|
| 291 |
search_bar.change(
|
| 292 |
fn=select_columns_fn,
|
| 293 |
+
inputs=[
|
| 294 |
+
machine_value,
|
| 295 |
+
subsets_value,
|
| 296 |
+
backends_value,
|
| 297 |
+
hardware_type_value,
|
| 298 |
+
columns_checkboxes,
|
| 299 |
+
search_bar,
|
| 300 |
+
],
|
| 301 |
outputs=[leaderboard_table],
|
| 302 |
)
|