Spaces:
Running
Running
UI changes 11 Nov
Browse files
app.py
CHANGED
|
@@ -311,8 +311,8 @@ def regenerate_prompt(model_a, model_b, eval_prompt, *variable_values):
|
|
| 311 |
score_b, # score_b textbox
|
| 312 |
critique_b, # critique_b textbox
|
| 313 |
gr.update(visible=True), # action_buttons_row
|
| 314 |
-
gr.update(value="*Model:
|
| 315 |
-
gr.update(value="*Model:
|
| 316 |
model1, # model_a_state
|
| 317 |
model2, # model_b_state
|
| 318 |
)
|
|
@@ -457,7 +457,6 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 457 |
with gr.Row():
|
| 458 |
with gr.Column():
|
| 459 |
gr.Markdown(BATTLE_RULES)
|
| 460 |
-
gr.Markdown(EVAL_DESCRIPTION)
|
| 461 |
|
| 462 |
# Add Example Metrics Section
|
| 463 |
with gr.Accordion("Evaluator Prompt Templates", open=False):
|
|
@@ -473,29 +472,40 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 473 |
with gr.Row():
|
| 474 |
# Left column - Eval Prompt
|
| 475 |
with gr.Column(scale=1):
|
|
|
|
| 476 |
eval_prompt = gr.TextArea(
|
| 477 |
-
label="
|
| 478 |
lines=1,
|
| 479 |
-
value=
|
| 480 |
placeholder="Type your eval prompt here... denote variables in {{curly brackets}} to be populated on the right.",
|
| 481 |
show_label=True,
|
| 482 |
)
|
| 483 |
|
| 484 |
# Right column - Variable Mapping
|
| 485 |
with gr.Column(scale=1):
|
| 486 |
-
gr.Markdown("### Sample to
|
| 487 |
# Create inputs for up to 5 variables, with first two visible by default
|
| 488 |
variable_rows = []
|
| 489 |
for i in range(5):
|
| 490 |
-
initial_visibility = True if i <
|
| 491 |
with gr.Group(visible=initial_visibility) as var_row:
|
| 492 |
-
# Set default labels
|
| 493 |
default_label = (
|
| 494 |
-
"input" if i == 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
)
|
| 496 |
var_input = gr.Textbox(
|
| 497 |
container=True,
|
| 498 |
-
label=default_label,
|
|
|
|
| 499 |
)
|
| 500 |
variable_rows.append((var_row, var_input))
|
| 501 |
|
|
@@ -530,6 +540,11 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 530 |
"Regenerate with different models", variant="secondary", visible=False
|
| 531 |
)
|
| 532 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
# Add spacing and acknowledgements at the bottom
|
| 534 |
gr.Markdown(ACKNOWLEDGEMENTS)
|
| 535 |
|
|
@@ -823,7 +838,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 823 |
|
| 824 |
# Set default metric at startup
|
| 825 |
demo.load(
|
| 826 |
-
fn=lambda: set_example_metric("
|
| 827 |
outputs=[eval_prompt] + [var_input for _, var_input in variable_rows],
|
| 828 |
)
|
| 829 |
|
|
|
|
| 311 |
score_b, # score_b textbox
|
| 312 |
critique_b, # critique_b textbox
|
| 313 |
gr.update(visible=True), # action_buttons_row
|
| 314 |
+
gr.update(value="*Model: Hidden*"), # model_name_a
|
| 315 |
+
gr.update(value="*Model: Hidden*"), # model_name_b
|
| 316 |
model1, # model_a_state
|
| 317 |
model2, # model_b_state
|
| 318 |
)
|
|
|
|
| 457 |
with gr.Row():
|
| 458 |
with gr.Column():
|
| 459 |
gr.Markdown(BATTLE_RULES)
|
|
|
|
| 460 |
|
| 461 |
# Add Example Metrics Section
|
| 462 |
with gr.Accordion("Evaluator Prompt Templates", open=False):
|
|
|
|
| 472 |
with gr.Row():
|
| 473 |
# Left column - Eval Prompt
|
| 474 |
with gr.Column(scale=1):
|
| 475 |
+
gr.Markdown("### Evaluator Prompt")
|
| 476 |
eval_prompt = gr.TextArea(
|
| 477 |
+
label="",
|
| 478 |
lines=1,
|
| 479 |
+
value=EXAMPLE_METRICS["Hallucination"]["prompt"],
|
| 480 |
placeholder="Type your eval prompt here... denote variables in {{curly brackets}} to be populated on the right.",
|
| 481 |
show_label=True,
|
| 482 |
)
|
| 483 |
|
| 484 |
# Right column - Variable Mapping
|
| 485 |
with gr.Column(scale=1):
|
| 486 |
+
gr.Markdown("### Sample to evaluate")
|
| 487 |
# Create inputs for up to 5 variables, with first two visible by default
|
| 488 |
variable_rows = []
|
| 489 |
for i in range(5):
|
| 490 |
+
initial_visibility = True if i < 3 else False
|
| 491 |
with gr.Group(visible=initial_visibility) as var_row:
|
| 492 |
+
# Set default labels and values from Hallucination example
|
| 493 |
default_label = (
|
| 494 |
+
"input" if i == 0
|
| 495 |
+
else "ground_truth" if i == 1
|
| 496 |
+
else "response" if i == 2
|
| 497 |
+
else ""
|
| 498 |
+
)
|
| 499 |
+
default_value = (
|
| 500 |
+
EXAMPLE_METRICS["Hallucination"]["input"] if i == 0
|
| 501 |
+
else EXAMPLE_METRICS["Hallucination"]["ground_truth"] if i == 1
|
| 502 |
+
else EXAMPLE_METRICS["Hallucination"]["response"] if i == 2
|
| 503 |
+
else ""
|
| 504 |
)
|
| 505 |
var_input = gr.Textbox(
|
| 506 |
container=True,
|
| 507 |
+
label=default_label,
|
| 508 |
+
value=default_value
|
| 509 |
)
|
| 510 |
variable_rows.append((var_row, var_input))
|
| 511 |
|
|
|
|
| 540 |
"Regenerate with different models", variant="secondary", visible=False
|
| 541 |
)
|
| 542 |
|
| 543 |
+
gr.Markdown("<br>")
|
| 544 |
+
|
| 545 |
+
# Add evaluation tips
|
| 546 |
+
gr.Markdown(EVAL_DESCRIPTION)
|
| 547 |
+
|
| 548 |
# Add spacing and acknowledgements at the bottom
|
| 549 |
gr.Markdown(ACKNOWLEDGEMENTS)
|
| 550 |
|
|
|
|
| 838 |
|
| 839 |
# Set default metric at startup
|
| 840 |
demo.load(
|
| 841 |
+
fn=lambda: set_example_metric("Hallucination"),
|
| 842 |
outputs=[eval_prompt] + [var_input for _, var_input in variable_rows],
|
| 843 |
)
|
| 844 |
|