Spaces:

yixuantt
/

User-Study

Sleeping

App Files Files

User-Study / app.py

yixuantt

Upload app.py

6a3fd10 verified 7 months ago

raw

history blame

25.9 kB

	import gradio as gr
	import json
	from datetime import datetime
	import os
	import logging
	import random

	# Logger setup (unchanged)
	def _setup_logger():
	log_format = logging.Formatter("[%(asctime)s %(levelname)s] %(message)s")
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)
	console_handler = logging.StreamHandler()
	console_handler.setFormatter(log_format)
	logger.handlers = [console_handler]
	return logger

	logger = _setup_logger()

	DATA_DIR = "annotations_data2"
	os.makedirs(DATA_DIR, exist_ok=True)

	# Load questions from JSON (unchanged)
	with open("test_pairs2.json", "r") as f:
	response_pairs = json.load(f)

	# Function to generate assignments ensuring each question gets 7 labels
	def generate_assignments(num_questions=120, num_annotators=30, labels_per_question=7, questions_per_annotator=28):
	assignments = {f"annotator_{i+1}": [] for i in range(num_annotators)}
	question_assignments = {i: [] for i in range(num_questions)}
	annotator_capacities = [questions_per_annotator] * num_annotators

	for q in range(num_questions):
	available_annotators = [(a, annotator_capacities[a]) for a in range(num_annotators) if annotator_capacities[a] > 0]
	if len(available_annotators) < labels_per_question:
	raise ValueError(f"Not enough annotators with capacity for question {q}")

	available_annotators.sort(key=lambda x: x[1], reverse=True)
	selected_annotators = [a for a, _ in available_annotators[:labels_per_question]]

	for a in selected_annotators:
	assignments[f"annotator_{a+1}"].append(q)
	question_assignments[q].append(a)
	annotator_capacities[a] -= 1

	return assignments, question_assignments

	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap');
	body { font-family: 'Roboto', sans-serif !important; line-height: 1.6; }
	.panel { border: 1px solid #e5e7eb !important; border-radius: 12px !important; padding: 20px !important; }
	button { font-weight: 500 !important; transition: all 0.2s ease !important; font-family: 'Roboto', sans-serif !important; }
	button:hover { transform: translateY(-1px); }
	.progress { color: #4f46e5; font-weight: 500; }
	textarea { border-radius: 8px !important; padding: 12px !important; font-family: 'Roboto', sans-serif !important; }
	.selected-response { border: 2px solid #4f46e5 !important; background-color: #f5f3ff; }
	.instruction-panel { background: #f8f9fa !important; border: 1px solid #e0e0e0 !important; border-radius: 12px !important; padding: 25px !important; margin-bottom: 25px !important; }
	.criteria-list { margin-left: 20px !important; list-style-type: none !important; }
	.criteria-item { padding: 8px 0 !important; }
	.highlight { color: #4f46e5; font-weight: 500; }
	"""

	# Updated State class to include selected_indices, form_responses, and forms_completed
	class State:
	def __init__(self):
	self.current_idx = 0
	self.prolific_id = ""
	self.selected_indices = [] # List of 28 question indices for this user
	self.annotations = [] # Annotations for the 28 questions
	self.form_responses = {} # Responses to post-test forms
	self.forms_completed = False # Flag for form completion
	self.start_time = datetime.now()

	state = State()
	ASSIGNED_FILE = "assigned.json"

	def load_assigned():
	if os.path.exists(ASSIGNED_FILE):
	with open(ASSIGNED_FILE, "r") as f:
	return json.load(f)
	return {}

	def save_assigned(assigned):
	with open(ASSIGNED_FILE, "w") as f:
	json.dump(assigned, f, indent=2)

	def get_next_available_assignment(assigned, total_assignments=30):
	for i in range(1, total_assignments + 1):
	key = f"annotator_{i}"
	if key not in assigned.values():
	return key
	return None

	# Updated save_annotations to include new fields
	def save_annotations():
	if not state.prolific_id:
	return
	filename = f"{state.prolific_id}_latest.json"
	filepath = os.path.join(DATA_DIR, filename)
	data = {
	"prolific_id": state.prolific_id,
	"assignment_key": state.assignment_key,
	"selected_indices": state.selected_indices,
	"duration": (datetime.now() - state.start_time).total_seconds(),
	"current_idx": state.current_idx,
	"annotations": state.annotations,
	"form_responses": state.form_responses,
	"forms_completed": state.forms_completed
	}
	with open(filepath, "w") as f:
	json.dump(data, f, indent=2)
	logger.info(f"Saved annotations to {filepath}")
	return filepath

	# Updated load_latest_data to load new fields
	def load_latest_data(prolific_id):
	filename = f"{prolific_id}_latest.json"
	filepath = os.path.join(DATA_DIR, filename)
	if os.path.exists(filepath):
	try:
	data = json.load(open(filepath))
	state.selected_indices = data.get("selected_indices", [])
	state.annotations = data.get("annotations", [])
	state.form_responses = data.get("form_responses", {})
	state.forms_completed = data.get("forms_completed", False)
	state.current_idx = min(max(data.get("current_idx", 0), 0), 27)
	return data
	except Exception as e:
	logger.error(f"Error loading {filepath}: {e}")
	return None

	INSTRUCTION = """
	### Welcome! 🎉

	In this task, you'll act as a judge comparing two AI chatbot responses. Your goal is to determine which response is better based on specific criteria.

	### 📋 Task Overview:
	- You'll evaluate multiple questions (prompts), each with two responses (Response A and B)
	- Select the better response for each question based on the criteria below
	- Your progress will be tracked
	- After completing all questions, you'll answer a few post-test forms

	### 🏅 Evaluation Criteria:
	1. Perceived Usefulness
	→ Does the answer address the question effectively and provide relevant information?
	2. Social Presence
	→ Does the answer creates "the feeling of being there with a 'real' person"?


	### 🚀 Getting Started:
	1. Input your Prolific ID to begin
	2. Read the question carefully
	3. Compare both responses side-by-side
	4. Select the better response using the radio buttons
	5. Provide optional feedback and confidence rating
	6. Click "Next" to continue or "Previous" to review

	Note: You need select a response and confidence level before proceeding to the next question.

	Thank you for contributing to our research! Your input is valuable.
	"""

	MINI_INSTRUCTION = """You’ll compare two AI chatbot answers for different questions and pick the better one. Read the question, then look at Response A and Response B. Choose the one that’s better based on: Perceived Usefulness (answers well, gives useful info), and Social Presence (understands feelings, fits the situation).

	Select your choice and rate your confidence. Click "Next" to move on or "Previous" to go back. You must pick a response and confidence level to continue. Thanks for helping with our research!
	"""

	# Define post-test form questions (placeholders; replace with actual questions if available)
	forms_questions = {
	"Neuro-QoL Cognition Function": [
	{"question": "In the past 7 days, I had to read something several times to understand it.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]},
	{"question": "In the past 7 days, I had to work really hard to pay attention or I would make a mistake.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]},
	{"question": "In the past 7 days, I had trouble concentrating.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]},
	{"question": "In the past 7 days, I had trouble remembering things.", "options": ["Never", "Rarely", "Sometimes", "Often", "Very Often"]}
	],
	"Wong and Law Emotional Intelligence Scale (WLEIS)": [
	# // SEA
	{"question": "I have a good sense of why I have certain feelings most of the time.", "options": ["Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"]},
	{"question": "I have good understanding of my own emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I really understand what I feel.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I always know whether I am happy or not.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	# // OEA
	{"question": "I always know my friends’ emotions from their behavior.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I am a good observer of others’ emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I am sensitive to the feelings and emotions of others.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I have good understanding of the emotions of people around me.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	# // UOE
	{"question": "I always set goals for myself and then try my best to achieve them.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I always tell myself I am a competent person.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I am a self-motivated person.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I would always encourage myself to try my best.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	# ROE
	{"question": "I am able to control my temper and handle difficulties rationally.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I can always calm down quickly when I am very angry.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I have good control of my own emotions.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I can always stay calm in stressful situations.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}
	],
	"Algorithmic Aversion": [
	# Trust in LLM
	{"question": "I trust the answers provided by AI chatbots (e.g., ChatGPT) to be accurate.", "options": ["Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"]},
	{"question": "I feel confident relying on an AI chatbot for important tasks.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I worry that AI chatbots might give me incorrect information.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},

	# Preference for Human vs. LLM
	{"question": "I prefer asking a human expert over an AI chatbot for advice.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I would rather use a human-written article than one generated by an AI chatbot.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I find human interaction more valuable than interacting with an AI chatbot.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},

	# Willingness to Use LLM
	{"question": "I would avoid using an AI chatbot if I had other options.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I am willing to use an AI chatbot for daily tasks (e.g., writing, research).", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]},
	{"question": "I would recommend an AI chatbot to others.", "options": ["Strongly Disagree", "Disagree", "Neutral", " Agree", "Strongly Agree"]}
	],
	"Demographics": [
	{"question": "What is your highest level of education?", "options": [
	"Less than high school",
	"High school diploma",
	"Some college",
	"Associate's degree",
	"Bachelor's degree",
	"Master's degree",
	"Doctoral degree"
	]},
	]
	}

	def create_interface():
	with gr.Blocks(gr.themes.Ocean(), title="AI Response Evaluation", css=custom_css) as demo:
	# User ID Section (unchanged layout)
	with gr.Column(visible=True, elem_id="id_section") as id_section:
	with gr.Column(elem_classes="instruction-panel"):
	gr.Markdown(INSTRUCTION)
	gr.Markdown("---")
	gr.Markdown("## Prolific ID Verification")
	prolific_id = gr.Textbox(label="Enter your Prolific ID")
	id_submit_btn = gr.Button("Submit", variant="primary")
	id_message = gr.Markdown("", visible=False)

	# Main Interface (updated for 28 questions)
	with gr.Column(visible=False, elem_id="main_interface") as main_interface:
	progress_md = gr.Markdown("Progress: 0% (0/28)", elem_classes="progress")
	gr.HTML('<style>.prompt-highlight { background-color: #e6f7ff; padding: 10px; border: 1px solid #91d5ff; border-radius: 5px; }</style>')
	gr.Markdown(MINI_INSTRUCTION)
	gr.Markdown("---")
	gr.Markdown("### Current Question From a User")
	prompt_box = gr.Markdown(elem_classes="prompt-highlight")
	with gr.Row():
	with gr.Column(variant="panel"):
	gr.Markdown("### Response A")
	response_a = gr.Markdown(height='200px')
	with gr.Column(variant="panel"):
	gr.Markdown("### Response B")
	response_b = gr.Markdown(height='200px')
	selection_radio = gr.Radio(
	choices=[("Response A", "A"), ("Response B", "B")],
	label="Select the better response",
	)
	feedback = gr.Textbox(label="Additional Feedback (optional)", lines=1)
	confidence = gr.Radio(
	choices=[("1 - Not confident", 1), ("2", 2), ("3", 3), ("4", 4), ("5 - Very confident", 5)],
	label="Confidence Rating",
	)
	with gr.Row():
	prev_btn = gr.Button("Previous", variant="secondary")
	next_btn = gr.Button("Next", variant="primary")

	# New Forms Section
	with gr.Column(visible=False, elem_id="forms_section") as forms_section:
	gr.Markdown("## Pre-Test Questions")
	gr.Markdown("Please answer the following questions to complete the study.")
	form_radios = []
	for form_name, questions in forms_questions.items():
	for q in questions:
	radio = gr.Radio(choices=q["options"], label=q["question"])
	form_radios.append(radio)
	with gr.Row():
	back_to_questions_btn = gr.Button("Back to Questions", variant="secondary")
	submit_forms_btn = gr.Button("Submit Forms", variant="primary")

	# Completion Section (unchanged layout)
	with gr.Column(visible=False, elem_id="completion") as completion_section:
	gr.Markdown("# Thank You!")
	gr.Markdown("### Completion code: `CA7IOI65`")
	completion_md = gr.Markdown("Your annotations and form responses have been saved.")
	gr.HTML("""
	<p>Click <a href="https://app.prolific.com/researcher/submissions/complete?cc=CA7IOI65" target="_blank">here</a> to complete the task.</p>
	""")

	# Updated handle_id_submit to assign 28 random questions
	def handle_id_submit(prolific_id_val):
	if not prolific_id_val.strip():
	raise gr.Error("Please enter a valid Prolific ID")
	prolific_id = prolific_id_val.strip()
	assigned = load_assigned()
	if prolific_id in assigned:
	assignment_key = assigned[prolific_id]
	else:
	next_key = get_next_available_assignment(assigned)
	if next_key is None:
	return {
	id_section: gr.update(visible=True),
	forms_section: gr.update(visible=False),
	main_interface: gr.update(visible=False),
	completion_section: gr.update(visible=False),
	id_message: gr.update(value="The study is full. Thank you for your interest.", visible=True)
	}
	assigned[prolific_id] = next_key
	save_assigned(assigned)
	assignment_key = next_key

	state.prolific_id = prolific_id
	state.assignment_key = assignment_key
	state.selected_indices = assignments[assignment_key]
	data = load_latest_data(prolific_id)
	if data:
	if not state.forms_completed:
	return {
	id_section: gr.update(visible=False),
	forms_section: gr.update(visible=True),
	main_interface: gr.update(visible=False),
	completion_section: gr.update(visible=False),
	id_message: gr.update(value="", visible=False)
	}
	elif state.current_idx < 28:
	return {
	id_section: gr.update(visible=False),
	forms_section: gr.update(visible=False),
	main_interface: gr.update(visible=True),
	completion_section: gr.update(visible=False),
	id_message: gr.update(value="", visible=False),
	**update_interface(state.current_idx)
	}
	else:
	return {
	id_section: gr.update(visible=False),
	forms_section: gr.update(visible=False),
	main_interface: gr.update(visible=False),
	completion_section: gr.update(visible=True),
	id_message: gr.update(value="", visible=False)
	}
	else:
	state.annotations = [None] * 28
	state.current_idx = 0
	state.forms_completed = False
	state.form_responses = {}
	return {
	id_section: gr.update(visible=False),
	forms_section: gr.update(visible=True),
	main_interface: gr.update(visible=False),
	completion_section: gr.update(visible=False),
	id_message: gr.update(value="", visible=False)
	}
	# Updated update_interface to use selected_indices
	def update_interface(current_idx):
	if current_idx >= 28:
	current_idx = 27
	actual_idx = state.selected_indices[current_idx]
	current_data = response_pairs[actual_idx]
	progress = f"Progress: {current_idx/28:.0%} ({min(current_idx, 28)}/28)"
	annotation = state.annotations[current_idx] if current_idx < len(state.annotations) else None
	return {
	prompt_box: current_data.get("prompt", ""),
	response_a: current_data.get("responseA", ""),
	response_b: current_data.get("responseB", ""),
	progress_md: progress,
	feedback: annotation["feedback"] if annotation else "",
	confidence: annotation["confidence"] if annotation else None,
	selection_radio: annotation["selected"] if annotation else None
	}

	# Updated handle_navigation to transition to forms_section after 28 questions
	def handle_navigation(direction, selection, confidence_val, feedback_val):
	error_msg = None
	if direction == "next":
	if not selection:
	error_msg = "Please select a response before proceeding."
	if not confidence_val:
	error_msg = "Please select a confidence level before proceeding."
	if error_msg:
	gr.Warning(error_msg)
	return {
	main_interface: gr.update(visible=True),
	completion_section: gr.update(visible=False),
	**update_interface(state.current_idx)
	}
	if selection and confidence_val:
	actual_idx = state.selected_indices[state.current_idx]
	annotation = {
	"id": response_pairs[actual_idx]["id"],
	"prompt": response_pairs[actual_idx]["prompt"],
	"selected": selection,
	"confidence": confidence_val,
	"feedback": feedback_val,
	"timestamp": datetime.now().isoformat()
	}
	state.annotations[state.current_idx] = annotation
	if direction == "next":
	new_idx = min(state.current_idx + 1, 28)
	else:
	new_idx = max(0, state.current_idx - 1)
	state.current_idx = new_idx
	save_annotations()
	if new_idx >= 28:
	return {
	main_interface: gr.update(visible=False),
	completion_section: gr.update(visible=True),
	**update_interface(27)
	}
	else:
	return {
	main_interface: gr.update(visible=True),
	completion_section: gr.update(visible=False),
	**update_interface(new_idx)
	}

	# New function to handle returning to questions from forms
	def handle_back_to_questions():
	state.current_idx = 27
	save_annotations()
	return {
	main_interface: gr.update(visible=True),
	forms_section: gr.update(visible=False),
	completion_section: gr.update(visible=False),
	**update_interface(27)
	}

	# New function to handle form submission
	def handle_forms_submit(*form_inputs):
	if any(input_val is None for input_val in form_inputs):
	gr.Warning("Please answer all questions before proceeding.")
	return {
	forms_section: gr.update(visible=True),
	main_interface: gr.update(visible=False),
	completion_section: gr.update(visible=False)
	}
	state.form_responses = {}
	idx = 0
	for form_name, questions in forms_questions.items():
	for q in questions:
	key = f"{form_name}_{q['question']}"
	state.form_responses[key] = form_inputs[idx]
	idx += 1
	state.forms_completed = True
	save_annotations()
	state.current_idx = 0
	return {
	forms_section: gr.update(visible=False),
	main_interface: gr.update(visible=True),
	completion_section: gr.update(visible=False),
	**update_interface(0)
	}

	# Event bindings
	id_submit_btn.click(
	handle_id_submit,
	inputs=prolific_id,
	outputs=[id_section, forms_section, main_interface, completion_section, id_message, prompt_box,
	response_a, response_b, progress_md, feedback, confidence, selection_radio]
	)

	prev_btn.click(
	handle_navigation,
	inputs=[gr.State("prev"), selection_radio, confidence, feedback],
	outputs=[main_interface, completion_section, prompt_box, response_a,
	response_b, progress_md, feedback, confidence, selection_radio]
	)

	next_btn.click(
	handle_navigation,
	inputs=[gr.State("next"), selection_radio, confidence, feedback],
	outputs=[main_interface, completion_section, prompt_box, response_a,
	response_b, progress_md, feedback, confidence, selection_radio]
	)

	back_to_questions_btn.click(
	handle_back_to_questions,
	inputs=[],
	outputs=[main_interface, forms_section, completion_section, prompt_box, response_a,
	response_b, progress_md, feedback, confidence, selection_radio]
	)

	submit_forms_btn.click(
	handle_forms_submit,
	inputs=form_radios,
	outputs=[forms_section, main_interface, completion_section, prompt_box, response_a,
	response_b, progress_md, feedback, confidence, selection_radio]
	)

	return demo

	if __name__ == "__main__":
	if not os.path.exists("assignments.json"):
	assignments,_ = generate_assignments()
	print("Assignments generated.")
	with open("assignments.json", "w") as f:
	json.dump(assignments, f, indent=2)
	else:
	with open("assignments.json", "r") as f:
	assignments = json.load(f)
	print("Assignments loaded.")
	app = create_interface()
	app.launch()