Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| from datetime import datetime | |
| import os | |
| import logging | |
| import random | |
| # Logger setup (unchanged) | |
| def _setup_logger(): | |
| log_format = logging.Formatter("[%(asctime)s %(levelname)s] %(message)s") | |
| logger = logging.getLogger() | |
| logger.setLevel(logging.INFO) | |
| console_handler = logging.StreamHandler() | |
| console_handler.setFormatter(log_format) | |
| logger.handlers = [console_handler] | |
| return logger | |
| logger = _setup_logger() | |
| DATA_DIR = "annotations_data2" | |
| os.makedirs(DATA_DIR, exist_ok=True) | |
| # Load questions from JSON (unchanged) | |
| with open("test_pairs2.json", "r") as f: | |
| response_pairs = json.load(f) | |
| # Function to generate assignments ensuring each question gets 7 labels | |
| def generate_assignments(num_questions=120, num_annotators=30, labels_per_question=7, questions_per_annotator=28): | |
| assignments = {f"annotator_{i+1}": [] for i in range(num_annotators)} | |
| question_assignments = {i: [] for i in range(num_questions)} | |
| annotator_capacities = [questions_per_annotator] * num_annotators | |
| for q in range(num_questions): | |
| available_annotators = [(a, annotator_capacities[a]) for a in range(num_annotators) if annotator_capacities[a] > 0] | |
| if len(available_annotators) < labels_per_question: | |
| raise ValueError(f"Not enough annotators with capacity for question {q}") | |
| available_annotators.sort(key=lambda x: x[1], reverse=True) | |
| selected_annotators = [a for a, _ in available_annotators[:labels_per_question]] | |
| for a in selected_annotators: | |
| assignments[f"annotator_{a+1}"].append(q) | |
| question_assignments[q].append(a) | |
| annotator_capacities[a] -= 1 | |
| return assignments, question_assignments | |
| custom_css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap'); | |
| body { font-family: 'Roboto', sans-serif !important; line-height: 1.6; } | |
| .panel { border: 1px solid #e5e7eb !important; border-radius: 12px !important; padding: 20px !important; } | |
| button { font-weight: 500 !important; transition: all 0.2s ease !important; font-family: 'Roboto', sans-serif !important; } | |
| button:hover { transform: translateY(-1px); } | |
| .progress { color: #4f46e5; font-weight: 500; } | |
| textarea { border-radius: 8px !important; padding: 12px !important; font-family: 'Roboto', sans-serif !important; } | |
| .selected-response { border: 2px solid #4f46e5 !important; background-color: #f5f3ff; } | |
| .instruction-panel { background: #f8f9fa !important; border: 1px solid #e0e0e0 !important; border-radius: 12px !important; padding: 25px !important; margin-bottom: 25px !important; } | |
| .criteria-list { margin-left: 20px !important; list-style-type: none !important; } | |
| .criteria-item { padding: 8px 0 !important; } | |
| .highlight { color: #4f46e5; font-weight: 500; } | |
| """ | |
| # Updated State class to include selected_indices, form_responses, and forms_completed | |
| class State: | |
| def __init__(self): | |
| self.current_idx = 0 | |
| self.prolific_id = "" | |
| self.selected_indices = [] # List of 28 question indices for this user | |
| self.annotations = [] # Annotations for the 28 questions | |
| self.form_responses = {} # Responses to post-test forms | |
| self.forms_completed = False # Flag for form completion | |
| self.start_time = datetime.now() | |
| state = State() | |
| ASSIGNED_FILE = "assigned.json" | |
| def load_assigned(): | |
| if os.path.exists(ASSIGNED_FILE): | |
| with open(ASSIGNED_FILE, "r") as f: | |
| return json.load(f) | |
| return {} | |
| def save_assigned(assigned): | |
| with open(ASSIGNED_FILE, "w") as f: | |
| json.dump(assigned, f, indent=2) | |
| def get_next_available_assignment(assigned, total_assignments=30): | |
| for i in range(1, total_assignments + 1): | |
| key = f"annotator_{i}" | |
| if key not in assigned.values(): | |
| return key | |
| return None | |
| # Updated save_annotations to include new fields | |
| def save_annotations(): | |
| if not state.prolific_id: | |
| return | |
| filename = f"{state.prolific_id}_latest.json" | |
| filepath = os.path.join(DATA_DIR, filename) | |
| data = { | |
| "prolific_id": state.prolific_id, | |
| "assignment_key": state.assignment_key, | |
| "selected_indices": state.selected_indices, | |
| "duration": (datetime.now() - state.start_time).total_seconds(), | |
| "current_idx": state.current_idx, | |
| "annotations": state.annotations, | |
| "form_responses": state.form_responses, | |
| "forms_completed": state.forms_completed | |
| } | |
| with open(filepath, "w") as f: | |
| json.dump(data, f, indent=2) | |
| logger.info(f"Saved annotations to {filepath}") | |
| return filepath | |
| # Updated load_latest_data to load new fields | |
| def load_latest_data(prolific_id): | |
| filename = f"{prolific_id}_latest.json" | |
| filepath = os.path.join(DATA_DIR, filename) | |
| if os.path.exists(filepath): | |
| try: | |
| data = json.load(open(filepath)) | |
| state.selected_indices = data.get("selected_indices", []) | |
| state.annotations = data.get("annotations", []) | |
| state.form_responses = data.get("form_responses", {}) | |
| state.forms_completed = data.get("forms_completed", False) | |
| state.current_idx = min(max(data.get("current_idx", 0), 0), 27) | |
| return data | |
| except Exception as e: | |
| logger.error(f"Error loading {filepath}: {e}") | |
| return None | |
| INSTRUCTION = """ | |
| ### Welcome! π | |
| In this task, you'll act as a judge comparing two AI chatbot responses. Your goal is to determine which response is better based on specific criteria. | |
| ### π Task Overview: | |
| - You'll evaluate multiple questions (prompts), each with two responses (Response A and B) | |
| - Select the better response for each question based on the criteria below | |
| - Your progress will be tracked | |
| - After completing all questions, you'll answer a few post-test forms | |
| ### π Evaluation Criteria: | |
| 1. **Perceived Usefulness** | |
| β Does the answer address the question effectively and provide relevant information? | |
| 2. **Social Presence** | |
| β Does the answer creates "the feeling of being there with a 'real' person"? | |
| ### π Getting Started: | |
| 1. Input your Prolific ID to begin | |
| 2. Read the question carefully | |
| 3. Compare both responses side-by-side | |
| 4. Select the better response using the radio buttons | |
| 5. Provide optional feedback and confidence rating | |
| 6. Click "Next" to continue or "Previous" to review | |
| **Note:** You need select a response and confidence level before proceeding to the next question. | |
| *Thank you for contributing to our research! Your input is valuable.* | |
| """ | |
| MINI_INSTRUCTION = """Youβll compare two AI chatbot answers for different questions and pick the better one. Read the question, then look at Response A and Response B. Choose the one thatβs better based on: Perceived Usefulness (answers well, gives useful info), and Social Presence (understands feelings, fits the situation). | |
| *Select your choice and rate your confidence. Click "Next" to move on or "Previous" to go back. You must pick a response and confidence level to continue. Thanks for helping with our research!* | |
| """ | |
| # Define post-test form questions (placeholders; replace with actual questions if available) | |
| forms_questions = { | |
| "Neuro-QoL Cognition Function": [ | |
| {"question": "In the past 7 days, I had to read something several times to understand it.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]}, | |
| {"question": "In the past 7 days, I had to work really hard to pay attention or I would make a mistake.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]}, | |
| {"question": "In the past 7 days, I had trouble concentrating.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]}, | |
| {"question": "In the past 7 days, I had trouble remembering things.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]} | |
| ], | |
| "Wong and Law Emotional Intelligence Scale (WLEIS)": [ | |
| # // SEA | |
| {"question": "I have a good sense of why I have certain feelings most of the time.", "options": ["Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"]}, | |
| {"question": "I have good understanding of my own emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I really understand what I feel.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I always know whether I am happy or not.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| # // OEA | |
| {"question": "I always know my friendsβ emotions from their behavior.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I am a good observer of othersβ emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I am sensitive to the feelings and emotions of others.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I have good understanding of the emotions of people around me.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| # // UOE | |
| {"question": "I always set goals for myself and then try my best to achieve them.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I always tell myself I am a competent person.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I am a self-motivated person.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I would always encourage myself to try my best.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| # ROE | |
| {"question": "I am able to control my temper and handle difficulties rationally.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I can always calm down quickly when I am very angry.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I have good control of my own emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I can always stay calm in stressful situations.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]} | |
| ], | |
| "Algorithmic Aversion": [ | |
| # Trust in LLM | |
| {"question": "I trust the answers provided by AI chatbots (e.g., ChatGPT) to be accurate.", "options": ["Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"]}, | |
| {"question": "I feel confident relying on an AI chatbot for important tasks.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I worry that AI chatbots might give me incorrect information.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| # Preference for Human vs. LLM | |
| {"question": "I prefer asking a human expert over an AI chatbot for advice.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I would rather use a human-written article than one generated by an AI chatbot.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I find human interaction more valuable than interacting with an AI chatbot.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| # Willingness to Use LLM | |
| {"question": "I would avoid using an AI chatbot if I had other options.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I am willing to use an AI chatbot for daily tasks (e.g., writing, research).", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}, | |
| {"question": "I would recommend an AI chatbot to others.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]} | |
| ], | |
| "Demographics": [ | |
| {"question": "What is your highest level of education?", "options": [ | |
| "Less than high school", | |
| "High school diploma", | |
| "Some college", | |
| "Associate's degree", | |
| "Bachelor's degree", | |
| "Master's degree", | |
| "Doctoral degree" | |
| ]}, | |
| ] | |
| } | |
| def create_interface(): | |
| with gr.Blocks(gr.themes.Ocean(), title="AI Response Evaluation", css=custom_css) as demo: | |
| # User ID Section (unchanged layout) | |
| with gr.Column(visible=True, elem_id="id_section") as id_section: | |
| with gr.Column(elem_classes="instruction-panel"): | |
| gr.Markdown(INSTRUCTION) | |
| gr.Markdown("---") | |
| gr.Markdown("## Prolific ID Verification") | |
| prolific_id = gr.Textbox(label="Enter your Prolific ID") | |
| id_submit_btn = gr.Button("Submit", variant="primary") | |
| id_message = gr.Markdown("", visible=False) | |
| # Main Interface (updated for 28 questions) | |
| with gr.Column(visible=False, elem_id="main_interface") as main_interface: | |
| progress_md = gr.Markdown("**Progress:** 0% (0/28)", elem_classes="progress") | |
| gr.HTML('<style>.prompt-highlight { background-color: #e6f7ff; padding: 10px; border: 1px solid #91d5ff; border-radius: 5px; }</style>') | |
| gr.Markdown(MINI_INSTRUCTION) | |
| gr.Markdown("---") | |
| gr.Markdown("### Current Question From a User") | |
| prompt_box = gr.Markdown(elem_classes="prompt-highlight") | |
| with gr.Row(): | |
| with gr.Column(variant="panel"): | |
| gr.Markdown("### Response A") | |
| response_a = gr.Markdown(height='200px') | |
| with gr.Column(variant="panel"): | |
| gr.Markdown("### Response B") | |
| response_b = gr.Markdown(height='200px') | |
| selection_radio = gr.Radio( | |
| choices=[("Response A", "A"), ("Response B", "B")], | |
| label="Select the better response", | |
| ) | |
| feedback = gr.Textbox(label="Additional Feedback (optional)", lines=1) | |
| confidence = gr.Radio( | |
| choices=[("1 - Not confident", 1), ("2", 2), ("3", 3), ("4", 4), ("5 - Very confident", 5)], | |
| label="Confidence Rating", | |
| ) | |
| with gr.Row(): | |
| prev_btn = gr.Button("Previous", variant="secondary") | |
| next_btn = gr.Button("Next", variant="primary") | |
| # New Forms Section | |
| with gr.Column(visible=False, elem_id="forms_section") as forms_section: | |
| gr.Markdown("## Pre-Test Questions") | |
| gr.Markdown("Please answer the following questions to complete the study.") | |
| form_radios = [] | |
| for form_name, questions in forms_questions.items(): | |
| for q in questions: | |
| radio = gr.Radio(choices=q["options"], label=q["question"]) | |
| form_radios.append(radio) | |
| with gr.Row(): | |
| back_to_questions_btn = gr.Button("Back to Questions", variant="secondary") | |
| submit_forms_btn = gr.Button("Submit Forms", variant="primary") | |
| # Completion Section (unchanged layout) | |
| with gr.Column(visible=False, elem_id="completion") as completion_section: | |
| gr.Markdown("# Thank You!") | |
| gr.Markdown("### Completion code: `CA7IOI65`") | |
| completion_md = gr.Markdown("Your annotations and form responses have been saved.") | |
| gr.HTML(""" | |
| <p>Click <a href="https://app.prolific.com/researcher/submissions/complete?cc=CA7IOI65" target="_blank">here</a> to complete the task.</p> | |
| """) | |
| # Updated handle_id_submit to assign 28 random questions | |
| def handle_id_submit(prolific_id_val): | |
| if not prolific_id_val.strip(): | |
| raise gr.Error("Please enter a valid Prolific ID") | |
| prolific_id = prolific_id_val.strip() | |
| assigned = load_assigned() | |
| if prolific_id in assigned: | |
| assignment_key = assigned[prolific_id] | |
| else: | |
| next_key = get_next_available_assignment(assigned) | |
| if next_key is None: | |
| return { | |
| id_section: gr.update(visible=True), | |
| forms_section: gr.update(visible=False), | |
| main_interface: gr.update(visible=False), | |
| completion_section: gr.update(visible=False), | |
| id_message: gr.update(value="The study is full. Thank you for your interest.", visible=True) | |
| } | |
| assigned[prolific_id] = next_key | |
| save_assigned(assigned) | |
| assignment_key = next_key | |
| state.prolific_id = prolific_id | |
| state.assignment_key = assignment_key | |
| state.selected_indices = assignments[assignment_key] | |
| data = load_latest_data(prolific_id) | |
| if data: | |
| if not state.forms_completed: | |
| return { | |
| id_section: gr.update(visible=False), | |
| forms_section: gr.update(visible=True), | |
| main_interface: gr.update(visible=False), | |
| completion_section: gr.update(visible=False), | |
| id_message: gr.update(value="", visible=False) | |
| } | |
| elif state.current_idx < 28: | |
| return { | |
| id_section: gr.update(visible=False), | |
| forms_section: gr.update(visible=False), | |
| main_interface: gr.update(visible=True), | |
| completion_section: gr.update(visible=False), | |
| id_message: gr.update(value="", visible=False), | |
| **update_interface(state.current_idx) | |
| } | |
| else: | |
| return { | |
| id_section: gr.update(visible=False), | |
| forms_section: gr.update(visible=False), | |
| main_interface: gr.update(visible=False), | |
| completion_section: gr.update(visible=True), | |
| id_message: gr.update(value="", visible=False) | |
| } | |
| else: | |
| state.annotations = [None] * 28 | |
| state.current_idx = 0 | |
| state.forms_completed = False | |
| state.form_responses = {} | |
| return { | |
| id_section: gr.update(visible=False), | |
| forms_section: gr.update(visible=True), | |
| main_interface: gr.update(visible=False), | |
| completion_section: gr.update(visible=False), | |
| id_message: gr.update(value="", visible=False) | |
| } | |
| # Updated update_interface to use selected_indices | |
| def update_interface(current_idx): | |
| if current_idx >= 28: | |
| current_idx = 27 | |
| actual_idx = state.selected_indices[current_idx] | |
| current_data = response_pairs[actual_idx] | |
| progress = f"**Progress:** {current_idx/28:.0%} ({min(current_idx, 28)}/28)" | |
| annotation = state.annotations[current_idx] if current_idx < len(state.annotations) else None | |
| return { | |
| prompt_box: current_data.get("prompt", ""), | |
| response_a: current_data.get("responseA", ""), | |
| response_b: current_data.get("responseB", ""), | |
| progress_md: progress, | |
| feedback: annotation["feedback"] if annotation else "", | |
| confidence: annotation["confidence"] if annotation else None, | |
| selection_radio: annotation["selected"] if annotation else None | |
| } | |
| # Updated handle_navigation to transition to forms_section after 28 questions | |
| def handle_navigation(direction, selection, confidence_val, feedback_val): | |
| error_msg = None | |
| if direction == "next": | |
| if not selection: | |
| error_msg = "Please select a response before proceeding." | |
| if not confidence_val: | |
| error_msg = "Please select a confidence level before proceeding." | |
| if error_msg: | |
| gr.Warning(error_msg) | |
| return { | |
| main_interface: gr.update(visible=True), | |
| completion_section: gr.update(visible=False), | |
| **update_interface(state.current_idx) | |
| } | |
| if selection and confidence_val: | |
| actual_idx = state.selected_indices[state.current_idx] | |
| annotation = { | |
| "id": response_pairs[actual_idx]["id"], | |
| "prompt": response_pairs[actual_idx]["prompt"], | |
| "selected": selection, | |
| "confidence": confidence_val, | |
| "feedback": feedback_val, | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| state.annotations[state.current_idx] = annotation | |
| if direction == "next": | |
| new_idx = min(state.current_idx + 1, 28) | |
| else: | |
| new_idx = max(0, state.current_idx - 1) | |
| state.current_idx = new_idx | |
| save_annotations() | |
| if new_idx >= 28: | |
| return { | |
| main_interface: gr.update(visible=False), | |
| completion_section: gr.update(visible=True), | |
| **update_interface(27) | |
| } | |
| else: | |
| return { | |
| main_interface: gr.update(visible=True), | |
| completion_section: gr.update(visible=False), | |
| **update_interface(new_idx) | |
| } | |
| # New function to handle returning to questions from forms | |
| def handle_back_to_questions(): | |
| state.current_idx = 27 | |
| save_annotations() | |
| return { | |
| main_interface: gr.update(visible=True), | |
| forms_section: gr.update(visible=False), | |
| completion_section: gr.update(visible=False), | |
| **update_interface(27) | |
| } | |
| # New function to handle form submission | |
| def handle_forms_submit(*form_inputs): | |
| if any(input_val is None for input_val in form_inputs): | |
| gr.Warning("Please answer all questions before proceeding.") | |
| return { | |
| forms_section: gr.update(visible=True), | |
| main_interface: gr.update(visible=False), | |
| completion_section: gr.update(visible=False) | |
| } | |
| state.form_responses = {} | |
| idx = 0 | |
| for form_name, questions in forms_questions.items(): | |
| for q in questions: | |
| key = f"{form_name}_{q['question']}" | |
| state.form_responses[key] = form_inputs[idx] | |
| idx += 1 | |
| state.forms_completed = True | |
| save_annotations() | |
| state.current_idx = 0 | |
| return { | |
| forms_section: gr.update(visible=False), | |
| main_interface: gr.update(visible=True), | |
| completion_section: gr.update(visible=False), | |
| **update_interface(0) | |
| } | |
| # Event bindings | |
| id_submit_btn.click( | |
| handle_id_submit, | |
| inputs=prolific_id, | |
| outputs=[id_section, forms_section, main_interface, completion_section, id_message, prompt_box, | |
| response_a, response_b, progress_md, feedback, confidence, selection_radio] | |
| ) | |
| prev_btn.click( | |
| handle_navigation, | |
| inputs=[gr.State("prev"), selection_radio, confidence, feedback], | |
| outputs=[main_interface, completion_section, prompt_box, response_a, | |
| response_b, progress_md, feedback, confidence, selection_radio] | |
| ) | |
| next_btn.click( | |
| handle_navigation, | |
| inputs=[gr.State("next"), selection_radio, confidence, feedback], | |
| outputs=[main_interface, completion_section, prompt_box, response_a, | |
| response_b, progress_md, feedback, confidence, selection_radio] | |
| ) | |
| back_to_questions_btn.click( | |
| handle_back_to_questions, | |
| inputs=[], | |
| outputs=[main_interface, forms_section, completion_section, prompt_box, response_a, | |
| response_b, progress_md, feedback, confidence, selection_radio] | |
| ) | |
| submit_forms_btn.click( | |
| handle_forms_submit, | |
| inputs=form_radios, | |
| outputs=[forms_section, main_interface, completion_section, prompt_box, response_a, | |
| response_b, progress_md, feedback, confidence, selection_radio] | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| if not os.path.exists("assignments.json"): | |
| assignments,_ = generate_assignments() | |
| print("Assignments generated.") | |
| with open("assignments.json", "w") as f: | |
| json.dump(assignments, f, indent=2) | |
| else: | |
| with open("assignments.json", "r") as f: | |
| assignments = json.load(f) | |
| print("Assignments loaded.") | |
| app = create_interface() | |
| app.launch() |