#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Evaluation back-end for the “interactive-graph” interface on Hugging Face Spaces
───────────────────────────────────────────────────────────────────────────────
* Serves all evaluation UIs (`/eval_interfaces/`).
* Transparently patches every explanation HTML so you never touch the originals:
▸ layout / badge / telemetry tweaks (unchanged)
▸ **NEW** helper that counts steps and answers the parent’s
`xai-get-step-count`, so the prompt shows “1 – N”.
* **DEBUG MODE** (toggle with `ICOT_DEBUG=1`) prints:
▸ every file request + whether it was patched
▸ console-side step counts inside each iframe.
* Persists results to CSV / JSON and can push them to
`LLM-XAI/interactive-COT-data` if `ICOT_TOKEN` is set.
"""
import os, csv, json, uuid, logging
from datetime import datetime
from pathlib import Path
from flask import (
Flask, abort, Response, send_file, request,
url_for, render_template_string, jsonify
)
from huggingface_hub import HfApi, login
import re # ← add near other imports
from datasets import load_dataset
import random
# ────────────────────────── SET THE COUNTERS ──────────────────────
MAX_USERS = 50
COT_COUNTER_FILE = "regular_cot_counter.txt"
GRAPH_COUNTER_FILE = "graph_counter.txt"
CODE_COUNTER_FILE = "code_counter.txt"
NATURAL_LANG_COUNTER_FILE = "natural_lang_counter.txt"
SELECTED_CARD = "graph"
def get_submit_counter(file_path:str) -> int:
with open(file_path, 'r') as f:
try:
val = int(f.read().strip())
except ValueError:
val = 0
return val
def increment_submit_counter(file_path:str) -> int:
with open(file_path, 'r+') as f:
current = get_submit_counter(file_path)
new_value = (current+1) % (MAX_USERS+1)
if new_value == 0:
new_value = 1
f.seek(0)
f.write(str(new_value))
f.truncate()
return new_value
def increment_submit_counter_absolute(file_path:str) -> int:
with open(file_path, 'r+') as f:
current = get_submit_counter(file_path)
new_value = current+1
f.seek(0)
f.write(str(new_value))
f.truncate()
return new_value
def get_the_min_interface() -> str:
# format_dict = {"interactive_coding_explanations": "code", "interactive_graph_explanations": "graph", "interactive_nat_lang_explanations": "inl","traditional_cot_explanations": "cot"}
# ds = load_dataset("Miles1999/interactive-COT-data")
# train_df = ds["train"].to_pandas()
# train_df['interface_type'] = train_df['samples'].apply(get_interface_format)
# counts = train_df['interface_type'].value_counts()
# log.info(counts)
# min_count = counts.min()
# # Find all formats that have this minimum count
# min_formats = counts[counts == min_count].index.tolist()
# log.info("min formats:")
# log.info(min_formats)
# # Pick one randomly if more than one
# chosen_format = random.choice(min_formats)
# log.info("chosen format:")
# log.info(chosen_format)
# return format_dict[chosen_format]
format_lst= ["code", "graph", "inl","cot"]
chosen_format = random.choice(format_lst)
return chosen_format
# this function extract the interface format from the sample path
def get_interface_format(sample_list):
file_path = sample_list[0].get("file", "")
parts = file_path.split("eval_interfaces/")
if len(parts) > 1:
return parts[1].split("/")[0] # the folder after eval_interfaces/
return None
# ────────────────────────── GLOBAL DEBUG FLAG ──────────────────────
DEBUG_MODE = os.getenv("ICOT_DEBUG", "0") != "0"
logging.basicConfig(
level=logging.DEBUG if DEBUG_MODE else logging.INFO,
format="%(asctime)s | %(levelname)-8s | %(message)s"
)
log = logging.getLogger(__name__)
log.info("Debug mode: %s", DEBUG_MODE)
# ───────────────────────────── CONFIG ──────────────────────────────
HF_TOKEN = os.getenv("ICOT_TOKEN") # set in Space → Settings → Secrets
if HF_TOKEN:
login(token=HF_TOKEN)
else:
log.warning("ICOT_TOKEN not set – results will stay local")
HF_REPO = "Miles1999/interactive-COT-data"
HF_FOLDER = "session_logs"
CODEBASE_DIR = "."
EVAL_PAGES = {
"cot" : "evaluation/eval_interfaces/reg_cot_eval_interface.html",
"interactive_nl" : "evaluation/eval_interfaces/nl_eval_interface.html",
"interactive_code" : "evaluation/eval_interfaces/coding_eval_interface.html",
"interactive_graph": "evaluation/eval_interfaces/graph_eval_interface.html",
}
ALLOWED_ROOTS = ["html_explanations", "evaluation"]
CSV_FILENAME = "evaluation_stats.csv"
CSV_PATH = Path(CSV_FILENAME).resolve()
CSV_HEADER = [
"timestamp","session_id","user_name",
"overallAccuracy(%)","correctItemAccuracy(%)","incorrectItemAccuracy(%)",
"avgTimeCorrect","avgTimeIncorrect",
]
SESSION_DIR = Path("/tmp/sessions")
SESSION_DIR.mkdir(parents=True, exist_ok=True)
# ───────────────────────────── HELPERS ─────────────────────────────
def gen_session_id() -> str:
return str(uuid.uuid4())
def save_session_local(sid: str, data: dict) -> Path:
path = SESSION_DIR / f"{sid}.json"
path.write_text(json.dumps(data, indent=2))
log.info("Stored session JSON → %s", path)
return path
def push_to_hf(local_path: Path, sid: str):
try:
HfApi().upload_file(
path_or_fileobj=str(local_path),
path_in_repo=f"{HF_FOLDER}/{local_path.name}",
repo_id=HF_REPO,
repo_type="dataset",
)
local_path.unlink()
log.info("Uploaded session %s to HF & removed local copy", sid)
except Exception as e:
log.warning("HF upload failed for %s : %s", sid, e)
# ────────────────────────── HTML PATCHING ──────────────────────────
INJECT_STYLE = """
"""
# ── Original helper (rename heading, add badges, telemetry) ──────────
INJECT_SCRIPT_BASE = """
"""
# ── NEW helper: answers “How many steps?” for the outer UI ───────────
INJECT_STEPCOUNT = """
"""
# ── NEW helper: adds “Step N” badges to .step-item, but skips “Final Answer” (For Graph Iterface)──
INJECT_SCRIPT_GRAPH_BADGE = """
"""
DISABLE_SCROLL_SCRIPT = """
"""
def preprocess_html(path: str) -> str:
"""Return patched HTML as string, injecting style + scripts."""
html = Path(path).read_text(encoding="utf-8")
# ── NEW: make problemData globally visible for graph pages ──
html = re.sub(r'\bconst\s+problemData\b', 'window.problemData', html)
# inj = INJECT_STYLE + INJECT_SCRIPT_BASE + INJECT_STEPCOUNT + DISABLE_SCROLL_SCRIPT + INJECT_SCRIPT_GRAPH_BADGE
inj = (
INJECT_STYLE +
INJECT_STEPCOUNT+
INJECT_SCRIPT_BASE+
DISABLE_SCROLL_SCRIPT
)
res = html.replace("", inj + "", 1) if "" in html else inj + html
if DEBUG_MODE:
log.debug("Injected helpers into %s (%d → %d bytes)", path, len(html), len(res))
return res
def needs_transform(path: str) -> bool:
p = path.replace("\\", "/").lower()
return (
p.endswith(".html") and
(
"/interactive_nat_lang_explanations/" in p or
"/interactive_graph_explanations/" in p or
"/interactive_coding_explanations/" in p
)
)
# ───────────────────────────── FLASK APP ───────────────────────────
app = Flask(__name__)
# ───────────────────────────── ROUTES ──────────────────────────────
# Landing page with four evaluation modes
SELECT_TEMPLATE = """
Select Evaluation Mode
Choose an Evaluation Interface
"""
@app.route("/")
def landing():
log.info("landing page update")
SELECTED_CARD = get_the_min_interface()
return render_template_string(SELECT_TEMPLATE, selected_card = SELECTED_CARD)
# frontend (outer) pages
@app.route("/eval_interfaces/ ")
def load_outer(option):
global SELECTED_CARD
rel = EVAL_PAGES.get(option)
if not rel:
abort(404)
#added below
full_path = Path(CODEBASE_DIR) / rel
html = full_path.read_text(encoding="utf-8")
#Inject the counter value
if option == "cot":
counter = increment_submit_counter(COT_COUNTER_FILE)
log.info("cot counter value %d", counter)
injected = f"\n"
html = html.replace("", injected + "")
elif option == "interactive_graph":
counter = increment_submit_counter(GRAPH_COUNTER_FILE)
log.info("graph counter value %d", counter)
injected = f"\n"
html = html.replace("", injected + "")
elif option == "interactive_code":
counter = increment_submit_counter(CODE_COUNTER_FILE)
log.info("code counter value %d", counter)
injected = f"\n"
html = html.replace("", injected + "")
elif option == "interactive_nl":
counter = increment_submit_counter(NATURAL_LANG_COUNTER_FILE)
log.info("natural language counter value %d", counter)
injected = f"\n"
html = html.replace("", injected + "")
return render_template_string(html)
# return send_file(Path(CODEBASE_DIR) / rel)
# Explanation HTML (inner iframes)
@app.route("/interactive-llm-xai/")
@app.route("/eval_interfaces/interactive-llm-xai/")
def serve_explanation(sub):
full = Path(sub).resolve()
needs = needs_transform(str(full))
log.info("serve_explanation | %s | needs_transform=%s", full, needs)
if not full.exists():
abort(404)
if full.is_dir():
return browse(sub)
if needs:
return Response(preprocess_html(str(full)), mimetype="text/html")
return send_file(full)
# Very lightweight directory browser (handy for debugging)
BROWSER_TEMPLATE = """
Browse
{% if parent_link %}[Parent]
{% endif %}
{% if directories %}Folders {% endif %}
{% if files %}HTML Files {% endif %}
{% if html_content %}{{ html_content|safe }}
{% endif %}
"""
@app.route("/browse/", defaults={"req_path": ""})
@app.route("/browse/")
def browse(req_path):
if req_path and req_path.split(os.sep)[0] not in ALLOWED_ROOTS:
abort(404)
full = Path(req_path).resolve()
if not full.exists():
abort(404)
if full.is_dir():
dirs, files = [], []
for e in sorted(full.iterdir()):
if e.name.startswith("."): # skip hidden
continue
rel = str(e.relative_to(Path(".")))
if e.is_dir():
dirs.append({"name": e.name, "link": rel})
elif e.suffix.lower() == ".html":
files.append({"name": e.name, "link": rel})
parent = url_for("landing") if not req_path else url_for("browse", req_path=str(full.parent))
return render_template_string(BROWSER_TEMPLATE,
parent_link=parent,
directories=dirs, files=files,
html_content=None)
# serve file content (HTML or plain text)
if full.suffix.lower() == ".html":
return send_file(full)
parent = url_for("browse", req_path=str(full.parent.relative_to(Path("."))))
txt = full.read_text(encoding="utf-8", errors="replace")
return render_template_string(BROWSER_TEMPLATE,
parent_link=parent,
html_content=f"{txt} ")
# ──────────────────────── RESULT ENDPOINTS ─────────────────────────
@app.route("/save-stats", methods=["POST"])
def save_stats():
# global SELECTED_CARD
data = request.get_json(force=True, silent=True) or {}
sid = data.get("sessionId") or gen_session_id()
stats = {k: data.get(k) for k in (
"overallAccuracy","correctItemAccuracy","incorrectItemAccuracy",
"avgTimeCorrect","avgTimeIncorrect","samples","subjective_feedback")}
stats["timestamp"] = datetime.utcnow().isoformat()
stats["session_id"]= sid
stats["user_name"] = data.get("userName","anonymous")
# quick CSV summary (one line)
row = [
stats["timestamp"], sid, stats["user_name"],
data.get("overallAccuracy"), data.get("correctItemAccuracy"),
data.get("incorrectItemAccuracy"), data.get("avgTimeCorrect"),
data.get("avgTimeIncorrect"),
]
need_header = not CSV_PATH.exists()
with CSV_PATH.open("a", newline="") as f:
w = csv.writer(f)
if need_header:
w.writerow(CSV_HEADER)
w.writerow(row)
# full JSON per session
path = save_session_local(sid, stats)
if HF_TOKEN:
push_to_hf(path, sid)
log.info("new result pushed to database")
SELECTED_CARD = get_the_min_interface()
log.info("current selected card")
log.info(SELECTED_CARD)
return jsonify({"status":"ok"})
# ─────────────────────────────── MAIN ──────────────────────────────
if __name__ == "__main__":
# For local debugging; HF Spaces will launch via gunicorn/uvicorn
app.run(host="0.0.0.0", port=int(os.getenv("PORT", 7860)), debug=False)