Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -669,7 +669,10 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 669 |
]
|
| 670 |
)
|
| 671 |
|
| 672 |
-
#
|
|
|
|
|
|
|
|
|
|
| 673 |
def submit_and_store(
|
| 674 |
use_reference,
|
| 675 |
eval_criteria_text_input,
|
|
@@ -681,7 +684,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 681 |
score3_description,
|
| 682 |
score4_description,
|
| 683 |
score5_description,
|
| 684 |
-
is_first_game
|
| 685 |
):
|
| 686 |
# Build prompt data dictionary
|
| 687 |
prompt_data = {
|
|
@@ -698,35 +701,31 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 698 |
|
| 699 |
# Get list of active models only for matches
|
| 700 |
active_models = [name for name, info in model_data.items()
|
| 701 |
-
if info.get("active", True)]
|
| 702 |
|
| 703 |
-
# Modified model selection logic
|
| 704 |
atla_model = "Atla-8B-preview-2024-01-08"
|
| 705 |
|
| 706 |
if is_first_game:
|
| 707 |
-
# For the first game, ensure
|
| 708 |
other_models = [m for m in active_models if m != atla_model]
|
| 709 |
other_model = random.choice(other_models)
|
| 710 |
|
| 711 |
-
# Randomly assign
|
| 712 |
if random.random() < 0.5:
|
| 713 |
model_a, model_b = atla_model, other_model
|
| 714 |
else:
|
| 715 |
model_a, model_b = other_model, atla_model
|
| 716 |
else:
|
| 717 |
-
# For subsequent games,
|
| 718 |
if random.random() < 0.3:
|
| 719 |
-
# Include Atla in this battle
|
| 720 |
other_models = [m for m in active_models if m != atla_model]
|
| 721 |
other_model = random.choice(other_models)
|
| 722 |
|
| 723 |
-
# Randomly assign Atla to either position A or B
|
| 724 |
if random.random() < 0.5:
|
| 725 |
model_a, model_b = atla_model, other_model
|
| 726 |
else:
|
| 727 |
model_a, model_b = other_model, atla_model
|
| 728 |
else:
|
| 729 |
-
# Battle between two non-Atla models
|
| 730 |
non_atla_models = [m for m in active_models if m != atla_model]
|
| 731 |
model1, model2 = random.sample(non_atla_models, 2)
|
| 732 |
model_a, model_b = (model1, model2) if random.random() < 0.5 else (model2, model1)
|
|
@@ -786,6 +785,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 786 |
gr.update(value="*Model: Hidden*"),
|
| 787 |
gr.update(value="Regenerate judges", variant="secondary", interactive=True),
|
| 788 |
gr.update(value="🎲"), # random_btn
|
|
|
|
| 789 |
)
|
| 790 |
|
| 791 |
# Update the click handler to use False for is_first_game after first submission
|
|
@@ -802,7 +802,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 802 |
|
| 803 |
# Update the send_btn click handler
|
| 804 |
send_btn.click(
|
| 805 |
-
fn=
|
| 806 |
inputs=[
|
| 807 |
use_reference_toggle,
|
| 808 |
eval_criteria_text,
|
|
@@ -814,6 +814,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 814 |
score3_description,
|
| 815 |
score4_description,
|
| 816 |
score5_description,
|
|
|
|
| 817 |
],
|
| 818 |
outputs=[
|
| 819 |
score_a,
|
|
@@ -830,6 +831,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 830 |
model_name_b,
|
| 831 |
send_btn,
|
| 832 |
random_btn,
|
|
|
|
| 833 |
],
|
| 834 |
)
|
| 835 |
|
|
|
|
| 669 |
]
|
| 670 |
)
|
| 671 |
|
| 672 |
+
# Add a new state variable to track first game
|
| 673 |
+
first_game_state = gr.State(True) # Initialize as True
|
| 674 |
+
|
| 675 |
+
# Update the submit function to use the state variable
|
| 676 |
def submit_and_store(
|
| 677 |
use_reference,
|
| 678 |
eval_criteria_text_input,
|
|
|
|
| 684 |
score3_description,
|
| 685 |
score4_description,
|
| 686 |
score5_description,
|
| 687 |
+
is_first_game, # Add state variable as input
|
| 688 |
):
|
| 689 |
# Build prompt data dictionary
|
| 690 |
prompt_data = {
|
|
|
|
| 701 |
|
| 702 |
# Get list of active models only for matches
|
| 703 |
active_models = [name for name, info in model_data.items()
|
| 704 |
+
if info.get("active", True)]
|
| 705 |
|
|
|
|
| 706 |
atla_model = "Atla-8B-preview-2024-01-08"
|
| 707 |
|
| 708 |
if is_first_game:
|
| 709 |
+
# For the first game, ensure new model is one of the models to catch up on votes
|
| 710 |
other_models = [m for m in active_models if m != atla_model]
|
| 711 |
other_model = random.choice(other_models)
|
| 712 |
|
| 713 |
+
# Randomly assign new model to either position A or B
|
| 714 |
if random.random() < 0.5:
|
| 715 |
model_a, model_b = atla_model, other_model
|
| 716 |
else:
|
| 717 |
model_a, model_b = other_model, atla_model
|
| 718 |
else:
|
| 719 |
+
# For subsequent games, new model appears 30% of the time
|
| 720 |
if random.random() < 0.3:
|
|
|
|
| 721 |
other_models = [m for m in active_models if m != atla_model]
|
| 722 |
other_model = random.choice(other_models)
|
| 723 |
|
|
|
|
| 724 |
if random.random() < 0.5:
|
| 725 |
model_a, model_b = atla_model, other_model
|
| 726 |
else:
|
| 727 |
model_a, model_b = other_model, atla_model
|
| 728 |
else:
|
|
|
|
| 729 |
non_atla_models = [m for m in active_models if m != atla_model]
|
| 730 |
model1, model2 = random.sample(non_atla_models, 2)
|
| 731 |
model_a, model_b = (model1, model2) if random.random() < 0.5 else (model2, model1)
|
|
|
|
| 785 |
gr.update(value="*Model: Hidden*"),
|
| 786 |
gr.update(value="Regenerate judges", variant="secondary", interactive=True),
|
| 787 |
gr.update(value="🎲"), # random_btn
|
| 788 |
+
False, # Set first_game_state to False after first submission
|
| 789 |
)
|
| 790 |
|
| 791 |
# Update the click handler to use False for is_first_game after first submission
|
|
|
|
| 802 |
|
| 803 |
# Update the send_btn click handler
|
| 804 |
send_btn.click(
|
| 805 |
+
fn=submit_and_store,
|
| 806 |
inputs=[
|
| 807 |
use_reference_toggle,
|
| 808 |
eval_criteria_text,
|
|
|
|
| 814 |
score3_description,
|
| 815 |
score4_description,
|
| 816 |
score5_description,
|
| 817 |
+
first_game_state, # Add first_game_state as input
|
| 818 |
],
|
| 819 |
outputs=[
|
| 820 |
score_a,
|
|
|
|
| 831 |
model_name_b,
|
| 832 |
send_btn,
|
| 833 |
random_btn,
|
| 834 |
+
first_game_state, # Add first_game_state as output
|
| 835 |
],
|
| 836 |
)
|
| 837 |
|