HeChuan1 commited on
Commit
ae15977
·
verified ·
1 Parent(s): ca3577b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -206
app.py CHANGED
@@ -1,207 +1,31 @@
1
- from transformers import pipeline
2
  import gradio as gr
3
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
4
- import pandas as pd
5
- from apscheduler.schedulers.background import BackgroundScheduler
6
- from huggingface_hub import snapshot_download
7
-
8
- from src.about import (
9
- CITATION_BUTTON_LABEL,
10
- CITATION_BUTTON_TEXT,
11
- EVALUATION_QUEUE_TEXT,
12
- INTRODUCTION_TEXT,
13
- LLM_BENCHMARKS_TEXT,
14
- TITLE,
15
- )
16
- from src.display.css_html_js import custom_css
17
- from src.display.utils import (
18
- BENCHMARK_COLS,
19
- COLS,
20
- EVAL_COLS,
21
- EVAL_TYPES,
22
- AutoEvalColumn,
23
- ModelType,
24
- fields,
25
- WeightType,
26
- Precision
27
- )
28
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
29
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
30
- from src.submission.submit import add_new_eval
31
- model1 = pipeline("text-generation", model="gpt2")
32
-
33
-
34
-
35
- def restart_space():
36
- API.restart_space(repo_id=REPO_ID)
37
-
38
- ### Space initialisation
39
- try:
40
- print(EVAL_REQUESTS_PATH)
41
- snapshot_download(
42
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
43
- )
44
- except Exception:
45
- restart_space()
46
- try:
47
- print(EVAL_RESULTS_PATH)
48
- snapshot_download(
49
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
50
- )
51
- except Exception:
52
- restart_space()
53
-
54
-
55
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
56
-
57
- (
58
- finished_eval_queue_df,
59
- running_eval_queue_df,
60
- pending_eval_queue_df,
61
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
62
-
63
- def init_leaderboard(dataframe):
64
- if dataframe is None or dataframe.empty:
65
- raise ValueError("Leaderboard DataFrame is empty or None.")
66
- return Leaderboard(
67
- value=dataframe,
68
- datatype=[c.type for c in fields(AutoEvalColumn)],
69
- select_columns=SelectColumns(
70
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
71
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
72
- label="Select Columns to Display:",
73
- ),
74
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
75
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
76
- filter_columns=[
77
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
78
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
79
- ColumnFilter(
80
- AutoEvalColumn.params.name,
81
- type="slider",
82
- min=0.01,
83
- max=150,
84
- label="Select the number of parameters (B)",
85
- ),
86
- ColumnFilter(
87
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
88
- ),
89
- ],
90
- bool_checkboxgroup_label="Hide models",
91
- interactive=False,
92
- )
93
-
94
-
95
- demo = gr.Blocks(css=custom_css)
96
- with demo:
97
- gr.HTML(TITLE)
98
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
99
-
100
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
101
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
102
- leaderboard = init_leaderboard(LEADERBOARD_DF)
103
-
104
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
105
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
106
-
107
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
108
- with gr.Column():
109
- with gr.Row():
110
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
111
-
112
- with gr.Column():
113
- with gr.Accordion(
114
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
115
- open=False,
116
- ):
117
- with gr.Row():
118
- finished_eval_table = gr.components.Dataframe(
119
- value=finished_eval_queue_df,
120
- headers=EVAL_COLS,
121
- datatype=EVAL_TYPES,
122
- row_count=5,
123
- )
124
- with gr.Accordion(
125
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
126
- open=False,
127
- ):
128
- with gr.Row():
129
- running_eval_table = gr.components.Dataframe(
130
- value=running_eval_queue_df,
131
- headers=EVAL_COLS,
132
- datatype=EVAL_TYPES,
133
- row_count=5,
134
- )
135
-
136
- with gr.Accordion(
137
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
138
- open=False,
139
- ):
140
- with gr.Row():
141
- pending_eval_table = gr.components.Dataframe(
142
- value=pending_eval_queue_df,
143
- headers=EVAL_COLS,
144
- datatype=EVAL_TYPES,
145
- row_count=5,
146
- )
147
- with gr.Row():
148
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
149
-
150
- with gr.Row():
151
- with gr.Column():
152
- model_name_textbox = gr.Textbox(label="Model name")
153
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
154
- model_type = gr.Dropdown(
155
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
156
- label="Model type",
157
- multiselect=False,
158
- value=None,
159
- interactive=True,
160
- )
161
-
162
- with gr.Column():
163
- precision = gr.Dropdown(
164
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
165
- label="Precision",
166
- multiselect=False,
167
- value="float16",
168
- interactive=True,
169
- )
170
- weight_type = gr.Dropdown(
171
- choices=[i.value.name for i in WeightType],
172
- label="Weights type",
173
- multiselect=False,
174
- value="Original",
175
- interactive=True,
176
- )
177
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
178
-
179
- submit_button = gr.Button("Submit Eval")
180
- submission_result = gr.Markdown()
181
- submit_button.click(
182
- add_new_eval,
183
- [
184
- model_name_textbox,
185
- base_model_name_textbox,
186
- revision_name_textbox,
187
- precision,
188
- weight_type,
189
- model_type,
190
- ],
191
- submission_result,
192
- )
193
-
194
- with gr.Row():
195
- with gr.Accordion("📙 Citation", open=False):
196
- citation_button = gr.Textbox(
197
- value=CITATION_BUTTON_TEXT,
198
- label=CITATION_BUTTON_LABEL,
199
- lines=20,
200
- elem_id="citation-button",
201
- show_copy_button=True,
202
- )
203
-
204
- scheduler = BackgroundScheduler()
205
- scheduler.add_job(restart_space, "interval", seconds=1800)
206
- scheduler.start()
207
- demo.queue(default_concurrency_limit=40).launch()
 
 
1
  import gradio as gr
2
+ from utils.grace_plot import plot_radar
3
+ from model_wrappers.model_a import run_model_a
4
+ from model_wrappers.model_b import run_model_b
5
+
6
+ def run_all_models(prompt):
7
+ return run_model_a(prompt), run_model_b(prompt)
8
+
9
+ with gr.Blocks() as demo:
10
+ with gr.Tab("LLM Benchmark"):
11
+ gr.Markdown("## ✨ 模型 GRACE 维度雷达图")
12
+ with gr.Row():
13
+ plot_btn = gr.Button("生成 GRACE 雷达图")
14
+ radar_output = gr.Plot()
15
+ plot_btn.click(fn=plot_radar, inputs=[], outputs=radar_output)
16
+
17
+ with gr.Tab("Arena"):
18
+ gr.Markdown("## 🤖 模型竞技场:同一输入比拼")
19
+ prompt = gr.Textbox(label="请输入 Prompt")
20
+ run_btn = gr.Button("运行所有模型")
21
+ output_a = gr.Textbox(label="Model A 输出")
22
+ output_b = gr.Textbox(label="Model B 输出")
23
+ run_btn.click(fn=run_all_models, inputs=[prompt], outputs=[output_a, output_b])
24
+
25
+ with gr.Tab("Report"):
26
+ with open("report.md", encoding="utf-8") as f:
27
+ report_md = f.read()
28
+ gr.Markdown(report_md)
29
+
30
+ if __name__ == "__main__":
31
+ demo.launch()