Spaces:
Running
Running
New models appear more often
Browse files
app.py
CHANGED
|
@@ -4,18 +4,17 @@ import random
|
|
| 4 |
from collections import defaultdict
|
| 5 |
from datetime import datetime
|
| 6 |
import hashlib
|
|
|
|
| 7 |
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
-
|
| 10 |
load_dotenv()
|
| 11 |
|
| 12 |
-
import gradio as gr
|
| 13 |
from gen_api_answer import (
|
| 14 |
get_model_response,
|
| 15 |
parse_model_response,
|
| 16 |
prometheus_parse_model_response,
|
| 17 |
atla_parse_model_response,
|
| 18 |
-
flow_judge_parse_model_response
|
| 19 |
)
|
| 20 |
|
| 21 |
from random_sample_generation import (
|
|
@@ -24,7 +23,9 @@ from random_sample_generation import (
|
|
| 24 |
generate_ai_response
|
| 25 |
)
|
| 26 |
from db import add_vote, create_db_connection, get_votes
|
|
|
|
| 27 |
from utils import Vote
|
|
|
|
| 28 |
from common import (
|
| 29 |
POLICY_CONTENT,
|
| 30 |
ACKNOWLEDGEMENTS,
|
|
@@ -717,18 +718,21 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 717 |
else:
|
| 718 |
model_a, model_b = other_model, atla_model
|
| 719 |
else:
|
| 720 |
-
# For subsequent games, new
|
| 721 |
-
if random.random() < 0.
|
| 722 |
-
|
|
|
|
|
|
|
| 723 |
other_model = random.choice(other_models)
|
| 724 |
|
| 725 |
if random.random() < 0.5:
|
| 726 |
-
model_a, model_b =
|
| 727 |
else:
|
| 728 |
-
model_a, model_b = other_model,
|
| 729 |
else:
|
| 730 |
-
|
| 731 |
-
|
|
|
|
| 732 |
model_a, model_b = (model1, model2) if random.random() < 0.5 else (model2, model1)
|
| 733 |
|
| 734 |
# Get responses from models
|
|
@@ -750,9 +754,8 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
| 750 |
is_prometheus_b = (model_data.get(model_b)['organization'] == 'Prometheus')
|
| 751 |
is_atla_a = (model_data.get(model_a)['organization'] == 'Atla')
|
| 752 |
is_atla_b = (model_data.get(model_b)['organization'] == 'Atla')
|
| 753 |
-
|
| 754 |
is_flow_judge_a = (model_data.get(model_a)['organization'] == 'Flow AI')
|
| 755 |
-
is_flow_judge_b = (model_data.get(model_b)['organization'] == 'Flow AI')
|
| 756 |
|
| 757 |
if is_prometheus_a:
|
| 758 |
score_a_val, critique_a_val = prometheus_parse_model_response(response_a)
|
|
|
|
| 4 |
from collections import defaultdict
|
| 5 |
from datetime import datetime
|
| 6 |
import hashlib
|
| 7 |
+
import gradio as gr
|
| 8 |
|
| 9 |
from dotenv import load_dotenv
|
|
|
|
| 10 |
load_dotenv()
|
| 11 |
|
|
|
|
| 12 |
from gen_api_answer import (
|
| 13 |
get_model_response,
|
| 14 |
parse_model_response,
|
| 15 |
prometheus_parse_model_response,
|
| 16 |
atla_parse_model_response,
|
| 17 |
+
flow_judge_parse_model_response
|
| 18 |
)
|
| 19 |
|
| 20 |
from random_sample_generation import (
|
|
|
|
| 23 |
generate_ai_response
|
| 24 |
)
|
| 25 |
from db import add_vote, create_db_connection, get_votes
|
| 26 |
+
|
| 27 |
from utils import Vote
|
| 28 |
+
|
| 29 |
from common import (
|
| 30 |
POLICY_CONTENT,
|
| 31 |
ACKNOWLEDGEMENTS,
|
|
|
|
| 718 |
else:
|
| 719 |
model_a, model_b = other_model, atla_model
|
| 720 |
else:
|
| 721 |
+
# For subsequent games, new models appears 40% of the time
|
| 722 |
+
if random.random() < 0.4:
|
| 723 |
+
# Randomly choose between new models
|
| 724 |
+
new_model = random.choice(["Atla-8B-preview", "Flow-Judge-1.0"])
|
| 725 |
+
other_models = [m for m in active_models if m not in [new_model, "Atla-8B-preview", "Flow-Judge-1.0"]]
|
| 726 |
other_model = random.choice(other_models)
|
| 727 |
|
| 728 |
if random.random() < 0.5:
|
| 729 |
+
model_a, model_b = new_model, other_model
|
| 730 |
else:
|
| 731 |
+
model_a, model_b = other_model, new_model
|
| 732 |
else:
|
| 733 |
+
# For other cases, exclude both Atla and Flow-Judge
|
| 734 |
+
non_special_models = [m for m in active_models if m not in ["Atla-8B-preview", "Flow-Judge-1.0"]]
|
| 735 |
+
model1, model2 = random.sample(non_special_models, 2)
|
| 736 |
model_a, model_b = (model1, model2) if random.random() < 0.5 else (model2, model1)
|
| 737 |
|
| 738 |
# Get responses from models
|
|
|
|
| 754 |
is_prometheus_b = (model_data.get(model_b)['organization'] == 'Prometheus')
|
| 755 |
is_atla_a = (model_data.get(model_a)['organization'] == 'Atla')
|
| 756 |
is_atla_b = (model_data.get(model_b)['organization'] == 'Atla')
|
|
|
|
| 757 |
is_flow_judge_a = (model_data.get(model_a)['organization'] == 'Flow AI')
|
| 758 |
+
is_flow_judge_b = (model_data.get(model_b)['organization'] == 'Flow AI')
|
| 759 |
|
| 760 |
if is_prometheus_a:
|
| 761 |
score_a_val, critique_a_val = prometheus_parse_model_response(response_a)
|