|
|
<!DOCTYPE html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"/> |
|
|
<meta name="viewport" content="width=device-width,initial-scale=1.0"/> |
|
|
<title>Explanation Evaluation</title> |
|
|
<link href="https://fonts.googleapis.com/css?family=Roboto:400,500,700&display=swap" rel="stylesheet"> |
|
|
|
|
|
<style> |
|
|
|
|
|
html, body{ margin: 0; |
|
|
padding: 0; |
|
|
background: #fff; |
|
|
height: 100vh; |
|
|
overflow: hidden; |
|
|
} |
|
|
.container{ |
|
|
height: 100vh; |
|
|
background: #fff; |
|
|
padding: 1rem; |
|
|
box-sizing: border-box; |
|
|
width: 100%; |
|
|
margin: 0; |
|
|
border-radius: 0; |
|
|
box-shadow: none; |
|
|
display: flex; |
|
|
flex-direction: column;} |
|
|
header{text-align:center;border-bottom:1px solid #dee2e6} |
|
|
header h2{margin:0;font-size:1.5rem;color:#343a40; margin-bottom:1rem} |
|
|
|
|
|
#progress-container{margin:.5rem 0;text-align:center;margin-top:5rem} |
|
|
progress{width:100%;height:20px;border-radius:10px;appearance:none} |
|
|
progress::-webkit-progress-bar{background:#f1f1f1} |
|
|
progress::-webkit-progress-value{background:#28a745;border-radius:10px} |
|
|
#progress-text{margin-top:.5rem;font-size:1.1rem;color:#495057} |
|
|
|
|
|
|
|
|
|
|
|
.explain-row{ |
|
|
display:flex; |
|
|
gap:16px; |
|
|
align-items:flex-start; |
|
|
} |
|
|
|
|
|
iframe{ |
|
|
width: 85%; |
|
|
height: 89vh; |
|
|
border: 2px solid #ced4da; |
|
|
border-radius: 4px; |
|
|
background: #fff; |
|
|
transform: scale(1, 1); |
|
|
transform-origin:top right; |
|
|
} |
|
|
|
|
|
.controls{ |
|
|
flex:1 1 0; |
|
|
display:flex; |
|
|
flex-direction:column; |
|
|
align-items:center; |
|
|
text-align:center; |
|
|
} |
|
|
|
|
|
.controls p{font-size:1.2rem;margin: 0.5rem 2rem 0.5rem 0;color:#343a40} |
|
|
button{padding:.8rem 1.5rem;margin:.5rem;font-size:1rem;border:none;border-radius:4px; |
|
|
cursor:pointer;transition:opacity .3s;background:#6c757d;color:#fff} |
|
|
button:hover{opacity:.9} |
|
|
|
|
|
#wrong-box{display:none;margin:1rem auto;text-align:center;flex-direction:column;align-items:center} |
|
|
#wrong-step{width:90px;padding:.45rem;text-align:center;font-size:1rem;margin-top:.4rem} |
|
|
#confirm-wrong{margin-top:.8rem} |
|
|
|
|
|
#download-btn{display:block;margin:1rem auto;visibility:hidden} |
|
|
#download-btn{background:#007bff;display:none} |
|
|
#restart-btn{display:block;margin:1rem auto} |
|
|
#restart-btn{background:#dc3545;display:none} |
|
|
|
|
|
#accuracy{margin-top:2rem;padding:1rem;border:1px solid #ced4da;border-radius:4px; |
|
|
background:#f8f9fa;color:#495057;font-size:1.1rem;line-height:1.6;text-align:center} |
|
|
#accuracy h2{margin:0 0 1rem} |
|
|
</style> |
|
|
</head> |
|
|
<body> |
|
|
<div class="container"> |
|
|
<header><h2>Interactive Code Explanation Experiment</h2></header> |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<div class="explain-row"> |
|
|
<iframe id="explanation-frame" src=""></iframe> |
|
|
|
|
|
|
|
|
<div class="controls" style="display:none"> |
|
|
<p id= "instruction"> you <strong>MUST</strong> do the experiment in full screen mode by clicking the button below π </p> |
|
|
<button id="full_screen" >Enter Full Screen</button> |
|
|
<p>Is the final answer correct or incorrect?</p> |
|
|
<button id="btn-correct">Correct</button> |
|
|
<button id="btn-wrong" >Incorrect</button> |
|
|
|
|
|
<div id="wrong-box"> |
|
|
<span>You think the final answer is incorrect! In which step do you think the model was wrong? Step (1 β <span id="max-step">1</span>)</span> |
|
|
<input id="wrong-step" type="number" min="1" step="1"> |
|
|
<button id="confirm-wrong">Confirm</button> |
|
|
</div> |
|
|
<div id="progress-container"> |
|
|
<progress id="progress-bar" value="0" max="10"></progress> |
|
|
<p id="progress-text">Question 0 of 10 (Remaining: 10)</p> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<button id="download-btn">Download Results</button> |
|
|
<button id="restart-btn">Submit</button> |
|
|
|
|
|
<div id="accuracy"></div> |
|
|
</div> |
|
|
|
|
|
|
|
|
|
|
|
<script> |
|
|
|
|
|
document.addEventListener("DOMContentLoaded", () => { |
|
|
const btn = document.getElementById('full_screen'); |
|
|
const span = document.getElementById('instruction'); |
|
|
|
|
|
function isFullscreen() { |
|
|
return document.fullscreenElement || |
|
|
document.webkitFullscreenElement || |
|
|
document.mozFullScreenElement || |
|
|
document.msFullscreenElement; |
|
|
} |
|
|
|
|
|
|
|
|
if (!isFullscreen()) { |
|
|
btn.style.visibility = 'visible'; |
|
|
span.style.visibility = "visible"; |
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
btn.addEventListener('click', () => { |
|
|
if (!isFullscreen()) { |
|
|
document.documentElement.requestFullscreen?.() || |
|
|
document.documentElement.webkitRequestFullscreen?.() || |
|
|
document.documentElement.mozRequestFullScreen?.() || |
|
|
document.documentElement.msRequestFullscreen?.(); |
|
|
} |
|
|
}); |
|
|
|
|
|
function toggleButton() { |
|
|
if (isFullscreen()) { |
|
|
btn.style.visibility = 'hidden'; |
|
|
span.style.visibility = "hidden"; |
|
|
} else { |
|
|
btn.style.visibility = 'visible'; |
|
|
span.style.visibility = "visible"; |
|
|
} |
|
|
} |
|
|
document.addEventListener('fullscreenchange', toggleButton); |
|
|
document.addEventListener('webkitfullscreenchange', toggleButton); |
|
|
document.addEventListener('mozfullscreenchange', toggleButton); |
|
|
document.addEventListener('MSFullscreenChange', toggleButton); |
|
|
}); |
|
|
|
|
|
|
|
|
const shuffle=a=>{for(let i=a.length-1;i>0;i--){const j=Math.floor(Math.random()*(i+1));[a[i],a[j]]=[a[j],a[i]];}return a;} |
|
|
const nowISO=()=>new Date().toISOString(); |
|
|
|
|
|
let userName="anonymous"; function setUserName(n){userName=n;} |
|
|
const sessionId=crypto.randomUUID(); |
|
|
const base ="interactive-llm-xai/evaluation/eval_interfaces/interactive_coding_explanations/"; |
|
|
const files = [ |
|
|
|
|
|
`${base}interactive_code_right_NA_41.html`, |
|
|
`${base}interactive_code_right_CA_${USER_COUNTER}.html`, |
|
|
`${base}interactive_code_wrong_CO_${USER_COUNTER}.html`, |
|
|
`${base}interactive_code_wrong_CS_${USER_COUNTER}.html`, |
|
|
`${base}interactive_code_wrong_CV_${USER_COUNTER}.html`, |
|
|
`${base}interactive_code_wrong_FC_${USER_COUNTER}.html`, |
|
|
`${base}interactive_code_wrong_HA_${USER_COUNTER}.html`, |
|
|
`${base}interactive_code_wrong_MS_${USER_COUNTER}.html`, |
|
|
`${base}interactive_code_wrong_OP_31.html`, |
|
|
`${base}interactive_code_wrong_UC_${USER_COUNTER}.html` |
|
|
]; |
|
|
const total=files.length; |
|
|
|
|
|
let idx=0,startTime="",firstClick=""; |
|
|
let clickCounts={play:0,stop:0,next:0,prev:0}; |
|
|
const samples=[]; |
|
|
let currentMaxStep=1; |
|
|
|
|
|
const frame=document.getElementById('explanation-frame'); |
|
|
const controls=document.querySelector('.controls'); |
|
|
const downloadBtn=document.getElementById('download-btn'); |
|
|
const restartBtn=document.getElementById('restart-btn'); |
|
|
const wrongBox=document.getElementById('wrong-box'); |
|
|
const wrongInput=document.getElementById('wrong-step'); |
|
|
const maxStepSpan=document.getElementById('max-step'); |
|
|
const accDiv=document.getElementById('accuracy'); |
|
|
|
|
|
function updateProgress(){ |
|
|
document.getElementById('progress-bar').value=idx; |
|
|
document.getElementById('progress-text').textContent= |
|
|
idx<total?`Question ${idx+1} of ${total} (Remaining: ${total-idx})` |
|
|
:'All questions reviewed.'; |
|
|
} |
|
|
|
|
|
window.addEventListener('message',ev=>{ |
|
|
if(!ev.data||ev.data.type!=='xai-click')return; |
|
|
clickCounts[ev.data.key]=(clickCounts[ev.data.key]||0)+1; |
|
|
if(!firstClick)firstClick=nowISO(); |
|
|
}); |
|
|
|
|
|
function loadNext(){ |
|
|
if(idx>=total){renderResults();return;} |
|
|
updateProgress(); |
|
|
frame.src=files[idx]; |
|
|
controls.style.display='block';downloadBtn.style.display='block'; |
|
|
wrongBox.style.display='none';wrongInput.value=''; |
|
|
startTime=Date.now();firstClick=null;clickCounts={play:0,stop:0,next:0,prev:0}; |
|
|
} |
|
|
frame.addEventListener('load',()=>{ |
|
|
const hide=frame.src.includes('instructions.html'); |
|
|
controls.style.display=hide?'none':'block'; |
|
|
downloadBtn.style.display=hide?'none':'black'; |
|
|
restartBtn.style.display='none'; |
|
|
if(!hide){ |
|
|
currentMaxStep=10; |
|
|
wrongInput.min=1;wrongInput.max=currentMaxStep;maxStepSpan.textContent=currentMaxStep; |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
document.getElementById('btn-correct').onclick=()=>saveAnswer('correct',0); |
|
|
document.getElementById('btn-wrong').onclick=()=>{ |
|
|
wrongBox.style.display='flex';wrongInput.value='';wrongInput.focus(); |
|
|
}; |
|
|
document.getElementById('confirm-wrong').onclick=()=>{ |
|
|
const n=parseInt(wrongInput.value,10); |
|
|
if(Number.isNaN(n)||n<1||n>currentMaxStep){ |
|
|
alert(`Enter a valid step number (1 β ${currentMaxStep})`);wrongInput.focus();return; |
|
|
} |
|
|
saveAnswer('incorrect',n);wrongBox.style.display='none'; |
|
|
}; |
|
|
function saveAnswer(ans,userInputWrongStep){ |
|
|
const elapsed=(Date.now()-startTime)/1000; |
|
|
const ActualWrongStep = parseInt(frame.contentDocument.querySelector('.wrong-step')?.textContent.trim() ?? '', 10); |
|
|
samples.push({ |
|
|
file:files[idx], |
|
|
id:files[idx].match(/([^/_]+_[^/_]+_\d+)\.html$/)[1], |
|
|
label:files[idx].includes('right')?'correct':'wrong', |
|
|
humanAnswer:ans, |
|
|
actualWrongstep: ActualWrongStep, |
|
|
userInputWrongStep, |
|
|
elapsedSeconds:+elapsed.toFixed(3), |
|
|
clickCounts, |
|
|
|
|
|
|
|
|
}); |
|
|
idx++;loadNext(); |
|
|
} |
|
|
|
|
|
function renderResults(){ |
|
|
|
|
|
const correctItems=samples.filter(s=>s.label==='correct'); |
|
|
const incorrectItems=samples.filter(s=>s.label==='wrong'); |
|
|
const correctHits=samples.filter(s=>s.label==='correct'&&s.humanAnswer==='correct').length; |
|
|
const incorrectHits=samples.filter(s=>s.label==='wrong'&&s.humanAnswer==='incorrect').length; |
|
|
const overallCorrect=correctHits+incorrectHits; |
|
|
const overallAcc=((overallCorrect/total)*100).toFixed(2); |
|
|
const correctAcc =correctItems.length?((correctHits /correctItems.length )*100).toFixed(2):'0.00'; |
|
|
const incorrectAcc=incorrectItems.length?((incorrectHits/incorrectItems.length)*100).toFixed(2):'0.00'; |
|
|
const avgTC=(correctItems .reduce((a,s)=>a+s.elapsedSeconds,0)/(correctItems.length ||1)).toFixed(2); |
|
|
const avgTI=(incorrectItems.reduce((a,s)=>a+s.elapsedSeconds,0)/(incorrectItems.length||1)).toFixed(2); |
|
|
|
|
|
controls.style.display='none';downloadBtn.style.display='none'; |
|
|
document.getElementById('progress-container').style.display='none'; |
|
|
frame.style.display='none'; |
|
|
|
|
|
accDiv.innerHTML=` |
|
|
<h2>Results</h2> |
|
|
<p><strong>Overall Accuracy:</strong> ${overallCorrect}/${total} (${overallAcc}%)</p> |
|
|
<p><strong>Correct-Item Accuracy:</strong> ${correctAcc}%</p> |
|
|
<p><strong>Incorrect-Item Accuracy:</strong> ${incorrectAcc}%</p> |
|
|
<p><strong>Avg Time (Correct):</strong> ${avgTC} s</p> |
|
|
<p><strong>Avg Time (Incorrect):</strong> ${avgTI} s</p> |
|
|
<textarea id="feedback-box" placeholder="Any comments or suggestions?"></textarea> |
|
|
`; |
|
|
restartBtn.style.display='block'; |
|
|
|
|
|
restartBtn.onclick=()=>{ |
|
|
const subjective_feedback=document.getElementById('feedback-box').value.trim(); |
|
|
fetch('/save-stats', { |
|
|
method: 'POST', |
|
|
headers: {'Content-Type': 'application/json'}, |
|
|
body: JSON.stringify({ |
|
|
sessionId, |
|
|
userName, |
|
|
overallAccuracy: +overallAcc, |
|
|
correctItemAccuracy: correctAcc, |
|
|
incorrectItemAccuracy: incorrectAcc, |
|
|
avgTimeCorrect: avgTC, |
|
|
avgTimeIncorrect: avgTI, |
|
|
samples, |
|
|
subjective_feedback |
|
|
}) |
|
|
}).then(response => { |
|
|
if (response.ok) { |
|
|
window.location.href = 'interactive-llm-xai/evaluation/eval_interfaces/thank_you_icoding.html'; |
|
|
} else { |
|
|
alert('Failed to save stats. Please try again.'); |
|
|
} |
|
|
}); |
|
|
}; |
|
|
} |
|
|
|
|
|
downloadBtn.onclick=()=>{ |
|
|
const hdr=['file','label','humanAnswer','wrongStep','time','play','stop','next','prev']; |
|
|
const rows=[hdr,...samples.map(s=>[ |
|
|
s.file,s.label,s.humanAnswer,s.wrongStep??'',s.elapsedSeconds, |
|
|
s.clickCounts.play,s.clickCounts.stop,s.clickCounts.next,s.clickCounts.prev |
|
|
])]; |
|
|
const csv=new Blob([rows.map(r=>r.join(',')).join('\n')],{type:'text/csv'}); |
|
|
const url=URL.createObjectURL(csv); |
|
|
const a=document.createElement('a');a.href=url;a.download='results.csv';a.click(); |
|
|
URL.revokeObjectURL(url); |
|
|
}; |
|
|
|
|
|
updateProgress(); |
|
|
frame.src="interactive-llm-xai/evaluation/eval_interfaces/instructions.html"; |
|
|
</script> |
|
|
</body> |
|
|
</html> |
|
|
|