craffel HF Staff commited on
Commit
b914d06
·
verified ·
1 Parent(s): 85fe6d4

Upload nemotron_fineinstructions_1T_judged_exp_chat/metrics.eval.jsonl with huggingface_hub

Browse files
nemotron_fineinstructions_1T_judged_exp_chat/metrics.eval.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {"created_at": "2025-10-28T22:09:55.716629", "global_step": 30000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.19983619983619982, "acc_stderr,none": 0.011448447996728383}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42710615415255926, "acc_stderr,none": 0.004936470085238481, "acc_norm,none": 0.560744871539534, "acc_norm_stderr,none": 0.0049528205388319054}, "mmlu": {"acc,none": 0.2366472012533827, "acc_stderr,none": 0.003583123916739435, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.24017003188097769, "acc_stderr,none": 0.006229548743113747, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.2698412698412698, "acc_stderr,none": 0.039701582732351734}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.21212121212121213, "acc_stderr,none": 0.031922715695482995}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.2647058823529412, "acc_stderr,none": 0.03096451792692341}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.2320675105485232, "acc_stderr,none": 0.02747974455080851}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.2396694214876033, "acc_stderr,none": 0.03896878985070417}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.25925925925925924, "acc_stderr,none": 0.04236511258094633}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.22699386503067484, "acc_stderr,none": 0.03291099578615767}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.2543352601156069, "acc_stderr,none": 0.02344582627654555}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.23798882681564246, "acc_stderr,none": 0.014242630070574885}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.1864951768488746, "acc_stderr,none": 0.022122439772480757}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.22530864197530864, "acc_stderr,none": 0.02324620264781975}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.2457627118644068, "acc_stderr,none": 0.01099615663514269}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.28654970760233917, "acc_stderr,none": 0.03467826685703826}, "mmlu_other": {"acc,none": 0.2500804634695848, "acc_stderr,none": 0.007748228240647257, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.31, "acc_stderr,none": 0.046482319871173156}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.2188679245283019, "acc_stderr,none": 0.02544786382510861}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.2023121387283237, "acc_stderr,none": 0.03063114553919882}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.2, "acc_stderr,none": 0.04020151261036845}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.32286995515695066, "acc_stderr,none": 0.03138147637575499}, "mmlu_management": {"alias": " - management", "acc,none": 0.17475728155339806, "acc_stderr,none": 0.03760178006026621}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.2948717948717949, "acc_stderr,none": 0.02987257770889117}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.3, "acc_stderr,none": 0.046056618647183814}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.015543377313719681}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.23529411764705882, "acc_stderr,none": 0.024288619466046095}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.24468085106382978, "acc_stderr,none": 0.025645553622266733}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.19117647058823528, "acc_stderr,none": 0.02388688192244033}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.0362933532994786}, "mmlu_social_sciences": {"acc,none": 0.23236919077023074, "acc_stderr,none": 0.007616856037339355, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.23684210526315788, "acc_stderr,none": 0.03999423879281335}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.20707070707070707, "acc_stderr,none": 0.028869778460267066}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.22797927461139897, "acc_stderr,none": 0.03027690994517825}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.2230769230769231, "acc_stderr,none": 0.02110773012724401}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.23529411764705882, "acc_stderr,none": 0.027553614467863807}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.21651376146788992, "acc_stderr,none": 0.017658710594443138}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.25190839694656486, "acc_stderr,none": 0.03807387116306086}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.2696078431372549, "acc_stderr,none": 0.017952449196987866}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.2, "acc_stderr,none": 0.03831305140884603}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.19591836734693877, "acc_stderr,none": 0.025409301953225678}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.24378109452736318, "acc_stderr,none": 0.03036049015401467}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_stem": {"acc,none": 0.2223279416428798, "acc_stderr,none": 0.007394226614508187, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.2, "acc_stderr,none": 0.04020151261036846}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.18518518518518517, "acc_stderr,none": 0.0335567721631314}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.21052631578947367, "acc_stderr,none": 0.03317672787533158}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.22916666666666666, "acc_stderr,none": 0.035146974678623884}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.23, "acc_stderr,none": 0.04229525846816507}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.27, "acc_stderr,none": 0.044619604333847394}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.20588235294117646, "acc_stderr,none": 0.04023382273617746}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.3, "acc_stderr,none": 0.046056618647183814}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.2553191489361702, "acc_stderr,none": 0.02850485647051418}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.23448275862068965, "acc_stderr,none": 0.035306258743465914}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.21164021164021163, "acc_stderr,none": 0.021037331505262893}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.18387096774193548, "acc_stderr,none": 0.02203721734026784}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.18719211822660098, "acc_stderr,none": 0.027444924966882618}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.28, "acc_stderr,none": 0.04512608598542128}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.26296296296296295, "acc_stderr,none": 0.026842057873833706}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.1986754966887417, "acc_stderr,none": 0.03257847384436777}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.1527777777777778, "acc_stderr,none": 0.024536326026134217}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.30357142857142855, "acc_stderr,none": 0.04364226155841044}, "sciq": {"alias": "sciq", "acc,none": 0.872, "acc_stderr,none": 0.010570133761108652, "acc_norm,none": 0.789, "acc_norm_stderr,none": 0.012909130321042094}}
2
+ {"created_at": "2025-10-30T03:57:05.486348", "global_step": 150000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.1941031941031941, "acc_stderr,none": 0.011323381588920446}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4510057757418841, "acc_stderr,none": 0.004965768348628046, "acc_norm,none": 0.592212706632145, "acc_norm_stderr,none": 0.004904189257891268}, "mmlu": {"acc,none": 0.23059393248824953, "acc_stderr,none": 0.0035488885071196653, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.24293304994686504, "acc_stderr,none": 0.006248534255488808, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.29365079365079366, "acc_stderr,none": 0.04073524322147125}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.21818181818181817, "acc_stderr,none": 0.03225078108306289}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.25980392156862747, "acc_stderr,none": 0.030778554678693264}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.270042194092827, "acc_stderr,none": 0.028900721906293426}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.2396694214876033, "acc_stderr,none": 0.03896878985070417}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.25925925925925924, "acc_stderr,none": 0.04236511258094632}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.22085889570552147, "acc_stderr,none": 0.032591773927421776}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.24566473988439305, "acc_stderr,none": 0.023176298203992012}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.23798882681564246, "acc_stderr,none": 0.014242630070574885}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.1864951768488746, "acc_stderr,none": 0.022122439772480764}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.20987654320987653, "acc_stderr,none": 0.022658344085981365}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.24771838331160365, "acc_stderr,none": 0.011025499291443737}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.32748538011695905, "acc_stderr,none": 0.035993357714560276}, "mmlu_other": {"acc,none": 0.2404248471194078, "acc_stderr,none": 0.007650411735089183, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.29, "acc_stderr,none": 0.045604802157206845}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.20754716981132076, "acc_stderr,none": 0.02495991802891127}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.21965317919075145, "acc_stderr,none": 0.031568093627031744}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.19, "acc_stderr,none": 0.03942772444036624}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.31390134529147984, "acc_stderr,none": 0.031146796482972465}, "mmlu_management": {"alias": " - management", "acc,none": 0.17475728155339806, "acc_stderr,none": 0.03760178006026621}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.2905982905982906, "acc_stderr,none": 0.029745048572674054}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.2388250319284802, "acc_stderr,none": 0.015246803197398687}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.023805186524888142}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.23404255319148937, "acc_stderr,none": 0.025257861359432414}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.1875, "acc_stderr,none": 0.023709788253811766}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "mmlu_social_sciences": {"acc,none": 0.21774455638609036, "acc_stderr,none": 0.007436943807547174, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.23684210526315788, "acc_stderr,none": 0.03999423879281335}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.18181818181818182, "acc_stderr,none": 0.027479603010538787}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.19689119170984457, "acc_stderr,none": 0.028697873971860677}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.20256410256410257, "acc_stderr,none": 0.020377660970371386}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.21008403361344538, "acc_stderr,none": 0.026461398717471874}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.1908256880733945, "acc_stderr,none": 0.016847676400091105}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.2595419847328244, "acc_stderr,none": 0.03844876139785271}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.25163398692810457, "acc_stderr,none": 0.01755581809132227}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.21818181818181817, "acc_stderr,none": 0.03955932861795833}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.19183673469387755, "acc_stderr,none": 0.02520696315422542}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.24378109452736318, "acc_stderr,none": 0.03036049015401466}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.28, "acc_stderr,none": 0.045126085985421276}, "mmlu_stem": {"acc,none": 0.21503330161750714, "acc_stderr,none": 0.00730363941342265, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.22, "acc_stderr,none": 0.04163331998932268}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.1925925925925926, "acc_stderr,none": 0.03406542058502653}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.17763157894736842, "acc_stderr,none": 0.031103182383123398}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.2569444444444444, "acc_stderr,none": 0.03653946969442099}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.19, "acc_stderr,none": 0.039427724440366234}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.26, "acc_stderr,none": 0.0440844002276808}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.21, "acc_stderr,none": 0.040936018074033256}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.21568627450980393, "acc_stderr,none": 0.04092563958237655}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.28, "acc_stderr,none": 0.04512608598542128}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.2680851063829787, "acc_stderr,none": 0.028957342788342347}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.03565998174135302}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.20634920634920634, "acc_stderr,none": 0.02084229093011467}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.18387096774193548, "acc_stderr,none": 0.022037217340267836}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.16748768472906403, "acc_stderr,none": 0.026273086047535414}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.25, "acc_stderr,none": 0.04351941398892446}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.02534809746809787}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.19205298013245034, "acc_stderr,none": 0.032162984205936156}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.1527777777777778, "acc_stderr,none": 0.02453632602613422}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.32142857142857145, "acc_stderr,none": 0.04432804055291518}, "sciq": {"alias": "sciq", "acc,none": 0.881, "acc_stderr,none": 0.010244215145336666, "acc_norm,none": 0.837, "acc_norm_stderr,none": 0.01168621271274684}}
3
+ {"created_at": "2025-10-31T09:38:33.004060", "global_step": 270000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.23013923013923013, "acc_stderr,none": 0.012050956185794128}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4998008364867556, "acc_stderr,none": 0.004989781015595478, "acc_norm,none": 0.660426209918343, "acc_norm_stderr,none": 0.004725967684806402}, "mmlu": {"acc,none": 0.27937615724255804, "acc_stderr,none": 0.003773762714304552, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.28437832093517534, "acc_stderr,none": 0.006562851601022282, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.03809523809523811}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.32727272727272727, "acc_stderr,none": 0.03663974994391243}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.29901960784313725, "acc_stderr,none": 0.03213325717373616}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.3206751054852321, "acc_stderr,none": 0.030381931949990403}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.2644628099173554, "acc_stderr,none": 0.040261875275912046}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.3148148148148148, "acc_stderr,none": 0.04489931073591312}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.25153374233128833, "acc_stderr,none": 0.034089978868575295}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.33815028901734107, "acc_stderr,none": 0.02546977014940018}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.23798882681564246, "acc_stderr,none": 0.014242630070574894}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.29260450160771706, "acc_stderr,none": 0.02583989833487798}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.2839506172839506, "acc_stderr,none": 0.025089478523765137}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.2790091264667536, "acc_stderr,none": 0.011455208832803534}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.40350877192982454, "acc_stderr,none": 0.03762738699917056}, "mmlu_other": {"acc,none": 0.29417444480205984, "acc_stderr,none": 0.008164869714474363, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.27, "acc_stderr,none": 0.044619604333847394}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.2943396226415094, "acc_stderr,none": 0.02804918631569525}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.2774566473988439, "acc_stderr,none": 0.034140140070440354}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.21, "acc_stderr,none": 0.040936018074033256}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.35874439461883406, "acc_stderr,none": 0.03219079200419995}, "mmlu_management": {"alias": " - management", "acc,none": 0.21359223300970873, "acc_stderr,none": 0.04058042015646034}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.3547008547008547, "acc_stderr,none": 0.03134250486245402}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.27, "acc_stderr,none": 0.04461960433384739}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.30140485312899107, "acc_stderr,none": 0.016409091097268798}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.3006535947712418, "acc_stderr,none": 0.026256053835718964}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.2730496453900709, "acc_stderr,none": 0.02657786094330785}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.25, "acc_stderr,none": 0.026303648393696036}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.03664314777288087}, "mmlu_social_sciences": {"acc,none": 0.2772180695482613, "acc_stderr,none": 0.00802358799639923, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.22807017543859648, "acc_stderr,none": 0.03947152782669415}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.23737373737373738, "acc_stderr,none": 0.030313710538198906}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.27979274611398963, "acc_stderr,none": 0.03239637046735703}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.2205128205128205, "acc_stderr,none": 0.02102067268082791}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.23949579831932774, "acc_stderr,none": 0.027722065493361255}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.26788990825688075, "acc_stderr,none": 0.01898746225797865}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.3053435114503817, "acc_stderr,none": 0.04039314978724561}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.2761437908496732, "acc_stderr,none": 0.018087276935663137}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.2, "acc_stderr,none": 0.03831305140884603}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.37142857142857144, "acc_stderr,none": 0.03093285879278984}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.34328358208955223, "acc_stderr,none": 0.03357379665433431}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.46, "acc_stderr,none": 0.05009082659620332}, "mmlu_stem": {"acc,none": 0.2594354582936885, "acc_stderr,none": 0.007778181376704438, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.27, "acc_stderr,none": 0.044619604333847394}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.34814814814814815, "acc_stderr,none": 0.041153246103369526}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.2236842105263158, "acc_stderr,none": 0.033911609343436046}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.22916666666666666, "acc_stderr,none": 0.035146974678623884}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.17, "acc_stderr,none": 0.03775251680686371}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.29, "acc_stderr,none": 0.045604802157206845}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.21568627450980393, "acc_stderr,none": 0.04092563958237655}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.38, "acc_stderr,none": 0.048783173121456316}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.30638297872340425, "acc_stderr,none": 0.030135906478517563}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.2689655172413793, "acc_stderr,none": 0.036951833116502325}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.25132275132275134, "acc_stderr,none": 0.022340482339643898}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.26129032258064516, "acc_stderr,none": 0.024993053397764815}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.2019704433497537, "acc_stderr,none": 0.028247350122180267}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.21851851851851853, "acc_stderr,none": 0.025195752251823786}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.271523178807947, "acc_stderr,none": 0.036313298039696545}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.19444444444444445, "acc_stderr,none": 0.02699145450203673}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.3482142857142857, "acc_stderr,none": 0.045218299028335844}, "sciq": {"alias": "sciq", "acc,none": 0.892, "acc_stderr,none": 0.009820001651345705, "acc_norm,none": 0.843, "acc_norm_stderr,none": 0.011510146979230189}}