Muennighoff's picture
Add eval
dec248e
raw
history blame
27.6 kB
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.3766654896097268, "bleu_stderr": 0.036770639314469625, "rouge1_fmeasure": 0.11125883286620787, "rouge1_fmeasure_stderr": 0.0021777719604464934, "rouge1_precision": 0.07463676126067638, "rouge1_precision_stderr": 0.0017536994314003643, "rouge1_recall": 0.3055243783167187, "rouge1_recall_stderr": 0.004923196038356293, "rouge2_fmeasure": 0.05241127421123892, "rouge2_fmeasure_stderr": 0.0013812586251192279, "rouge2_precision": 0.03495708380511325, "rouge2_precision_stderr": 0.0010755617786711133, "rouge2_recall": 0.1463988856897342, "rouge2_recall_stderr": 0.0033157649461190517, "rougeL_fmeasure": 0.10669398307951232, "rougeL_fmeasure_stderr": 0.0019971004635536125, "rougeL_precision": 0.07113651982917402, "rougeL_precision_stderr": 0.0015756847167816563, "rougeL_recall": 0.2969832786671565, "rougeL_recall_stderr": 0.004789895495471576, "rougeLsum_fmeasure": 0.10581227487100259, "rougeLsum_fmeasure_stderr": 0.002033223013812388, "rougeLsum_precision": 0.07095175301748666, "rougeLsum_precision_stderr": 0.0016401231748774104, "rougeLsum_recall": 0.290533682340263, "rougeLsum_recall_stderr": 0.004586951773397012}}, "1": {"PALM_prompt": {"bleu": 0.4529423055531777, "bleu_stderr": 0.029376727995766887, "rouge1_fmeasure": 0.110577089357591, "rouge1_fmeasure_stderr": 0.0019168631386664975, "rouge1_precision": 0.07103583110555811, "rouge1_precision_stderr": 0.0014180500988097634, "rouge1_recall": 0.35725561658848226, "rouge1_recall_stderr": 0.004979132659759252, "rouge2_fmeasure": 0.04996312318228885, "rouge2_fmeasure_stderr": 0.0012100542150851913, "rouge2_precision": 0.032140378761648755, "rouge2_precision_stderr": 0.0008696892164612032, "rouge2_recall": 0.16461190005306175, "rouge2_recall_stderr": 0.003453481891476274, "rougeL_fmeasure": 0.10375145892002761, "rougeL_fmeasure_stderr": 0.0017454474982763254, "rougeL_precision": 0.06661875269740569, "rougeL_precision_stderr": 0.0012810573815674243, "rougeL_recall": 0.3321900024913409, "rougeL_recall_stderr": 0.0044607717832792345, "rougeLsum_fmeasure": 0.10532629794602687, "rougeLsum_fmeasure_stderr": 0.0018123996946970554, "rougeLsum_precision": 0.06765495702505793, "rougeLsum_precision_stderr": 0.0013350310956810436, "rougeLsum_recall": 0.33890366046920695, "rougeLsum_recall_stderr": 0.004604484045462983}}, "2": {"PALM_prompt": {"bleu": 0.4684392378599957, "bleu_stderr": 0.02423260943577622, "rouge1_fmeasure": 0.11623937759114213, "rouge1_fmeasure_stderr": 0.001784845293944614, "rouge1_precision": 0.0742061054840223, "rouge1_precision_stderr": 0.0013333308857283244, "rouge1_recall": 0.3813776946410705, "rouge1_recall_stderr": 0.004784764653468254, "rouge2_fmeasure": 0.05284593819460868, "rouge2_fmeasure_stderr": 0.0011501305582368334, "rouge2_precision": 0.033657310660714944, "rouge2_precision_stderr": 0.0008265607728247616, "rouge2_recall": 0.18105822999099125, "rouge2_recall_stderr": 0.003521621574427344, "rougeL_fmeasure": 0.10881870482066662, "rougeL_fmeasure_stderr": 0.0016378189817036614, "rougeL_precision": 0.0694538029740241, "rougeL_precision_stderr": 0.001215774537003342, "rougeL_recall": 0.3551483422480297, "rougeL_recall_stderr": 0.004333932331791627, "rougeLsum_fmeasure": 0.1106286277546505, "rougeLsum_fmeasure_stderr": 0.001684740921307988, "rougeLsum_precision": 0.07060876630120069, "rougeLsum_precision_stderr": 0.0012537200980686366, "rougeLsum_recall": 0.362494034153821, "rougeLsum_recall_stderr": 0.004465894952684968}}, "3": {"PALM_prompt": {"bleu": 0.480745327945465, "bleu_stderr": 0.029266632664770444, "rouge1_fmeasure": 0.11471321060257549, "rouge1_fmeasure_stderr": 0.0017613576979984618, "rouge1_precision": 0.07296083803792867, "rouge1_precision_stderr": 0.001321074748458067, "rouge1_recall": 0.38481251417726425, "rouge1_recall_stderr": 0.0048211580190553474, "rouge2_fmeasure": 0.0518300700029641, "rouge2_fmeasure_stderr": 0.0011366275614018418, "rouge2_precision": 0.03295844925125063, "rouge2_precision_stderr": 0.0008219896164865758, "rouge2_recall": 0.18243074286093477, "rouge2_recall_stderr": 0.003520666385492241, "rougeL_fmeasure": 0.1063254653549725, "rougeL_fmeasure_stderr": 0.00160342062042125, "rougeL_precision": 0.06763104345742398, "rougeL_precision_stderr": 0.001193272012990937, "rougeL_recall": 0.3536800939368693, "rougeL_recall_stderr": 0.004281174744743548, "rougeLsum_fmeasure": 0.10874729624701576, "rougeLsum_fmeasure_stderr": 0.0016682920362204748, "rougeLsum_precision": 0.06919492439995835, "rougeLsum_precision_stderr": 0.0012476306005574289, "rougeLsum_recall": 0.36279528435732755, "rougeLsum_recall_stderr": 0.004441749268970921}}, "4": {"PALM_prompt": {"bleu": 0.5299253807020501, "bleu_stderr": 0.02941922571571106, "rouge1_fmeasure": 0.11813540445620313, "rouge1_fmeasure_stderr": 0.0017245577078934757, "rouge1_precision": 0.07480213178710299, "rouge1_precision_stderr": 0.0012754894492764339, "rouge1_recall": 0.3970764266910007, "rouge1_recall_stderr": 0.00479400389044778, "rouge2_fmeasure": 0.053480555426061054, "rouge2_fmeasure_stderr": 0.0011054733632853673, "rouge2_precision": 0.03379173374032418, "rouge2_precision_stderr": 0.00078403441220949, "rouge2_recall": 0.19044254036042865, "rouge2_recall_stderr": 0.003555747561111927, "rougeL_fmeasure": 0.10901926993196162, "rougeL_fmeasure_stderr": 0.0015659810128221983, "rougeL_precision": 0.0690688784250406, "rougeL_precision_stderr": 0.00115234955782402, "rougeL_recall": 0.3634475191206997, "rougeL_recall_stderr": 0.004237516494386168, "rougeLsum_fmeasure": 0.1120643474343094, "rougeLsum_fmeasure_stderr": 0.0016338803550659683, "rougeLsum_precision": 0.07098987300903557, "rougeLsum_precision_stderr": 0.0012076897268674832, "rougeLsum_recall": 0.3757199063882224, "rougeLsum_recall_stderr": 0.004437171081755198}}, "5": {"PALM_prompt": {"bleu": 0.5805390348408225, "bleu_stderr": 0.02819353208455936, "rouge1_fmeasure": 0.1186651527893959, "rouge1_fmeasure_stderr": 0.0017186269807706199, "rouge1_precision": 0.07501750256896958, "rouge1_precision_stderr": 0.001270218531497788, "rouge1_recall": 0.4015406332115864, "rouge1_recall_stderr": 0.004860633547147494, "rouge2_fmeasure": 0.054268686691182504, "rouge2_fmeasure_stderr": 0.0011142195248691033, "rouge2_precision": 0.03417436118906603, "rouge2_precision_stderr": 0.0007869739954224421, "rouge2_recall": 0.19527084059093697, "rouge2_recall_stderr": 0.0036390161400406632, "rougeL_fmeasure": 0.1091439877345682, "rougeL_fmeasure_stderr": 0.0015579828603400948, "rougeL_precision": 0.06904057033601822, "rougeL_precision_stderr": 0.0011483417413371603, "rougeL_recall": 0.3667546549943055, "rougeL_recall_stderr": 0.00429474219305135, "rougeLsum_fmeasure": 0.11215320024069554, "rougeLsum_fmeasure_stderr": 0.0016216594047244847, "rougeLsum_precision": 0.07095801808181956, "rougeLsum_precision_stderr": 0.0011991813477980299, "rougeLsum_recall": 0.3779985085599908, "rougeLsum_recall_stderr": 0.004487482755875942}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.510662462736792, "bleu_stderr": 0.061302798788800454, "rouge1_fmeasure": 0.17551095757877197, "rouge1_fmeasure_stderr": 0.0018448528757015485, "rouge1_precision": 0.1494281578111144, "rouge1_precision_stderr": 0.0018810068029499402, "rouge1_recall": 0.25540498447239807, "rouge1_recall_stderr": 0.0026106091827375255, "rouge2_fmeasure": 0.03596420933477661, "rouge2_fmeasure_stderr": 0.0008238102581483236, "rouge2_precision": 0.030405275555078732, "rouge2_precision_stderr": 0.0007245437335848381, "rouge2_recall": 0.05387294114750562, "rouge2_recall_stderr": 0.0013626210381348647, "rougeL_fmeasure": 0.13921782806493765, "rougeL_fmeasure_stderr": 0.0013333530590282482, "rougeL_precision": 0.1170432755610966, "rougeL_precision_stderr": 0.0013249085015013622, "rougeL_recall": 0.20780639606220117, "rougeL_recall_stderr": 0.002147406502628224, "rougeLsum_fmeasure": 0.1598569258636739, "rougeLsum_fmeasure_stderr": 0.0016588392998791086, "rougeLsum_precision": 0.13586278841655744, "rougeLsum_precision_stderr": 0.0016874314298753724, "rougeLsum_recall": 0.23371465565529836, "rougeLsum_recall_stderr": 0.002406078787801731}}, "1": {"tldr_en": {"bleu": 2.26919558776056, "bleu_stderr": 0.050822319622703574, "rouge1_fmeasure": 0.20514465511685417, "rouge1_fmeasure_stderr": 0.0018940338911781795, "rouge1_precision": 0.1769834522546966, "rouge1_precision_stderr": 0.0020936477263758862, "rouge1_recall": 0.2980600225358244, "rouge1_recall_stderr": 0.002698657973996375, "rouge2_fmeasure": 0.04697444873814672, "rouge2_fmeasure_stderr": 0.0009269481758786191, "rouge2_precision": 0.04083956382615695, "rouge2_precision_stderr": 0.0009432529119503935, "rouge2_recall": 0.07036915405856631, "rouge2_recall_stderr": 0.0015474755083422337, "rougeL_fmeasure": 0.14738229979838374, "rougeL_fmeasure_stderr": 0.00124575871906576, "rougeL_precision": 0.12587226969495116, "rougeL_precision_stderr": 0.0014145043525682814, "rougeL_recall": 0.22015261575662967, "rougeL_recall_stderr": 0.002100640459320018, "rougeLsum_fmeasure": 0.19153111088041924, "rougeLsum_fmeasure_stderr": 0.0017576567613173148, "rougeLsum_precision": 0.16507932558916488, "rougeLsum_precision_stderr": 0.0019519082692588704, "rougeLsum_recall": 0.2792890994786057, "rougeLsum_recall_stderr": 0.0025506318413613303}}, "2": {"tldr_en": {"bleu": 2.480588761761161, "bleu_stderr": 0.07877643384058922, "rouge1_fmeasure": 0.20606270663707582, "rouge1_fmeasure_stderr": 0.0018939261948929384, "rouge1_precision": 0.18008755617055971, "rouge1_precision_stderr": 0.00218506437634507, "rouge1_recall": 0.29842788296415007, "rouge1_recall_stderr": 0.002680398750285478, "rouge2_fmeasure": 0.0484515000599595, "rouge2_fmeasure_stderr": 0.0009528223813298393, "rouge2_precision": 0.04256264695681051, "rouge2_precision_stderr": 0.0009701649626331492, "rouge2_recall": 0.07243172972164508, "rouge2_recall_stderr": 0.0015644874896146475, "rougeL_fmeasure": 0.14864493218376815, "rougeL_fmeasure_stderr": 0.0012920985917904396, "rougeL_precision": 0.12870484607503332, "rougeL_precision_stderr": 0.001521735043379941, "rougeL_recall": 0.22077722370144454, "rougeL_recall_stderr": 0.002127023185915328, "rougeLsum_fmeasure": 0.19303438615149926, "rougeLsum_fmeasure_stderr": 0.0017674722342307051, "rougeLsum_precision": 0.16849593985953729, "rougeLsum_precision_stderr": 0.0020412010462905372, "rougeLsum_recall": 0.28030364781677897, "rougeLsum_recall_stderr": 0.002548392666837096}}, "3": {"tldr_en": {"bleu": 2.5501108974178366, "bleu_stderr": 0.10668208790688151, "rouge1_fmeasure": 0.17500582117172667, "rouge1_fmeasure_stderr": 0.0021811806119651773, "rouge1_precision": 0.16006747013266337, "rouge1_precision_stderr": 0.0024643430200379604, "rouge1_recall": 0.2522713002313001, "rouge1_recall_stderr": 0.0032150955842843985, "rouge2_fmeasure": 0.04137055724594726, "rouge2_fmeasure_stderr": 0.0009409183911576798, "rouge2_precision": 0.038116210600494964, "rouge2_precision_stderr": 0.0010733415003686914, "rouge2_recall": 0.062408576243544206, "rouge2_recall_stderr": 0.00162483698664969, "rougeL_fmeasure": 0.12680102501698154, "rougeL_fmeasure_stderr": 0.0015167002958347217, "rougeL_precision": 0.11639916816307759, "rougeL_precision_stderr": 0.0018599454921598617, "rougeL_recall": 0.18707384797466609, "rougeL_recall_stderr": 0.0024911945438878284, "rougeLsum_fmeasure": 0.1644467141506287, "rougeLsum_fmeasure_stderr": 0.0020390830214045063, "rougeLsum_precision": 0.1504634350637213, "rougeLsum_precision_stderr": 0.0023261582527568827, "rougeLsum_recall": 0.23784452695236408, "rougeLsum_recall_stderr": 0.0030524680525109106}}, "4": {"tldr_en": {"bleu": 0.5965000841679078, "bleu_stderr": 0.054286931126417394, "rouge1_fmeasure": 0.05668434801249912, "rouge1_fmeasure_stderr": 0.0019088449868065777, "rouge1_precision": 0.0526851643834691, "rouge1_precision_stderr": 0.001979736768263418, "rouge1_recall": 0.0845334524316102, "rouge1_recall_stderr": 0.0028934636600933803, "rouge2_fmeasure": 0.013611007566035085, "rouge2_fmeasure_stderr": 0.0006727084706707976, "rouge2_precision": 0.012114414758214996, "rouge2_precision_stderr": 0.0006374570177879572, "rouge2_recall": 0.02079506987284451, "rouge2_recall_stderr": 0.0011006122462439156, "rougeL_fmeasure": 0.042398217176957576, "rougeL_fmeasure_stderr": 0.0014108897692251782, "rougeL_precision": 0.039610077248726026, "rougeL_precision_stderr": 0.0015276886283061442, "rougeL_recall": 0.06442177105259915, "rougeL_recall_stderr": 0.002232958077385975, "rougeLsum_fmeasure": 0.052950111171242285, "rougeLsum_fmeasure_stderr": 0.0017836375211631576, "rougeLsum_precision": 0.04923469605877065, "rougeLsum_precision_stderr": 0.0018604843082340565, "rougeLsum_recall": 0.07906641897394578, "rougeLsum_recall_stderr": 0.0027148888816926928}}, "5": {"tldr_en": {"bleu": 1.1525132757548177e-06, "bleu_stderr": 2.259306115700327e-06, "rouge1_fmeasure": 0.008793201585915983, "rouge1_fmeasure_stderr": 0.0008310483858427221, "rouge1_precision": 0.008579395909238414, "rouge1_precision_stderr": 0.0009174195926944666, "rouge1_recall": 0.013020796957396425, "rouge1_recall_stderr": 0.0012305903729721045, "rouge2_fmeasure": 0.002008432892497026, "rouge2_fmeasure_stderr": 0.00025655167632805144, "rouge2_precision": 0.0020898536368983006, "rouge2_precision_stderr": 0.0003210270494989934, "rouge2_recall": 0.0028958917128531696, "rouge2_recall_stderr": 0.0003825524440022995, "rougeL_fmeasure": 0.006429413670718064, "rougeL_fmeasure_stderr": 0.0006058529249711972, "rougeL_precision": 0.0062033540793613754, "rougeL_precision_stderr": 0.0006488925801517133, "rougeL_recall": 0.00972935261803465, "rougeL_recall_stderr": 0.000935477756593659, "rougeLsum_fmeasure": 0.008150353865605486, "rougeLsum_fmeasure_stderr": 0.0007717799118499117, "rougeLsum_precision": 0.007968590768904082, "rougeLsum_precision_stderr": 0.0008588987786403505, "rougeLsum_recall": 0.012068678186927301, "rougeLsum_recall_stderr": 0.001141870875035626}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.0, "bleu_stderr": 0.0, "rouge1_fmeasure": 0.0, "rouge1_fmeasure_stderr": 0.0, "rouge1_precision": 0.0, "rouge1_precision_stderr": 0.0, "rouge1_recall": 0.0, "rouge1_recall_stderr": 0.0, "rouge2_fmeasure": 0.0, "rouge2_fmeasure_stderr": 0.0, "rouge2_precision": 0.0, "rouge2_precision_stderr": 0.0, "rouge2_recall": 0.0, "rouge2_recall_stderr": 0.0, "rougeL_fmeasure": 0.0, "rougeL_fmeasure_stderr": 0.0, "rougeL_precision": 0.0, "rougeL_precision_stderr": 0.0, "rougeL_recall": 0.0, "rougeL_recall_stderr": 0.0, "rougeLsum_fmeasure": 0.0, "rougeLsum_fmeasure_stderr": 0.0, "rougeLsum_precision": 0.0, "rougeLsum_precision_stderr": 0.0, "rougeLsum_recall": 0.0, "rougeLsum_recall_stderr": 0.0}}, "1": {"generate_text_restaurant": {"bleu": 10.509465358675882, "bleu_stderr": 0.14106239020485822, "rouge1_fmeasure": 0.418957457639259, "rouge1_fmeasure_stderr": 0.002211283624327926, "rouge1_precision": 0.49814096568223615, "rouge1_precision_stderr": 0.0031228169567879905, "rouge1_recall": 0.39990509441961064, "rouge1_recall_stderr": 0.0028017906261479744, "rouge2_fmeasure": 0.18711710727221215, "rouge2_fmeasure_stderr": 0.0017674670596259004, "rouge2_precision": 0.22578088805723098, "rouge2_precision_stderr": 0.0023550033503433516, "rouge2_recall": 0.17872544358367168, "rouge2_recall_stderr": 0.0019253259791803179, "rougeL_fmeasure": 0.30199481315949683, "rougeL_fmeasure_stderr": 0.001868507360902732, "rougeL_precision": 0.36143437488178726, "rougeL_precision_stderr": 0.0026893318226408037, "rougeL_recall": 0.2876616185668531, "rougeL_recall_stderr": 0.002240524632379903, "rougeLsum_fmeasure": 0.3403575566339607, "rougeLsum_fmeasure_stderr": 0.002104300102537745, "rougeLsum_precision": 0.4058839903966193, "rougeLsum_precision_stderr": 0.002929188637840434, "rougeLsum_recall": 0.32440784856528737, "rougeLsum_recall_stderr": 0.002513768971145275}}, "2": {"generate_text_restaurant": {"bleu": 11.961830917667758, "bleu_stderr": 0.19855453733230602, "rouge1_fmeasure": 0.4419809406794215, "rouge1_fmeasure_stderr": 0.0021746613541367608, "rouge1_precision": 0.51711462099421, "rouge1_precision_stderr": 0.0031893272264497357, "rouge1_recall": 0.42538520622426595, "rouge1_recall_stderr": 0.0027561541891646117, "rouge2_fmeasure": 0.20827123531143107, "rouge2_fmeasure_stderr": 0.0018331233011223366, "rouge2_precision": 0.2480861389204431, "rouge2_precision_stderr": 0.002530279348496146, "rouge2_recall": 0.2005277050867945, "rouge2_recall_stderr": 0.002005105269397663, "rougeL_fmeasure": 0.319606028471045, "rougeL_fmeasure_stderr": 0.0019100807104483971, "rougeL_precision": 0.37558496245296147, "rougeL_precision_stderr": 0.0027712951131830606, "rougeL_recall": 0.30771584728032136, "rougeL_recall_stderr": 0.002301745290698664, "rougeLsum_fmeasure": 0.36152978430493454, "rougeLsum_fmeasure_stderr": 0.002148135539279992, "rougeLsum_precision": 0.4235131872523603, "rougeLsum_precision_stderr": 0.0030145070146928936, "rougeLsum_recall": 0.34804301914590063, "rougeLsum_recall_stderr": 0.0025654845117966773}}, "3": {"generate_text_restaurant": {"bleu": 12.783813782272771, "bleu_stderr": 0.13373444208657323, "rouge1_fmeasure": 0.44617276274863055, "rouge1_fmeasure_stderr": 0.0021042615281562853, "rouge1_precision": 0.520355225619114, "rouge1_precision_stderr": 0.0031753747193647226, "rouge1_recall": 0.431040101001428, "rouge1_recall_stderr": 0.0027024973924205282, "rouge2_fmeasure": 0.21553899833577975, "rouge2_fmeasure_stderr": 0.0018441696189753596, "rouge2_precision": 0.2552178159365366, "rouge2_precision_stderr": 0.0025340406230315894, "rouge2_recall": 0.20857214956156206, "rouge2_recall_stderr": 0.0020499868980659807, "rougeL_fmeasure": 0.32589894572675115, "rougeL_fmeasure_stderr": 0.0019644581484736163, "rougeL_precision": 0.38160761429010903, "rougeL_precision_stderr": 0.0028611731395038137, "rougeL_recall": 0.31510091249307176, "rougeL_recall_stderr": 0.0023623327936241126, "rougeLsum_fmeasure": 0.36803351945986523, "rougeLsum_fmeasure_stderr": 0.0021157617938264584, "rougeLsum_precision": 0.4300053681752842, "rougeLsum_precision_stderr": 0.0030369140375128096, "rougeLsum_recall": 0.35550346504564223, "rougeLsum_recall_stderr": 0.002548411139172127}}, "4": {"generate_text_restaurant": {"bleu": 12.885751299941811, "bleu_stderr": 0.15969242921903556, "rouge1_fmeasure": 0.45026981993374054, "rouge1_fmeasure_stderr": 0.002080403587919867, "rouge1_precision": 0.523116023711691, "rouge1_precision_stderr": 0.003159510801552928, "rouge1_recall": 0.4347754349321631, "rouge1_recall_stderr": 0.0026623796893380926, "rouge2_fmeasure": 0.21817586594850874, "rouge2_fmeasure_stderr": 0.0018375017039996347, "rouge2_precision": 0.25726994872946823, "rouge2_precision_stderr": 0.002495382722223244, "rouge2_recall": 0.21096520047503017, "rouge2_recall_stderr": 0.0020291712606514177, "rougeL_fmeasure": 0.3276852275026859, "rougeL_fmeasure_stderr": 0.0019226922672484702, "rougeL_precision": 0.3817813434833727, "rougeL_precision_stderr": 0.00278674762622991, "rougeL_recall": 0.3167090675706668, "rougeL_recall_stderr": 0.0023133219861784697, "rougeLsum_fmeasure": 0.37171755091703396, "rougeLsum_fmeasure_stderr": 0.0021235373560235717, "rougeLsum_precision": 0.4318010197109263, "rougeLsum_precision_stderr": 0.00300932049819547, "rougeLsum_recall": 0.3593783018854229, "rougeLsum_recall_stderr": 0.002556809077049329}}, "5": {"generate_text_restaurant": {"bleu": 13.015089930317862, "bleu_stderr": 0.1862317304279187, "rouge1_fmeasure": 0.45175348014312466, "rouge1_fmeasure_stderr": 0.0020539637998556975, "rouge1_precision": 0.5237646441857315, "rouge1_precision_stderr": 0.003119955869914991, "rouge1_recall": 0.4350283312814063, "rouge1_recall_stderr": 0.0026050030826883933, "rouge2_fmeasure": 0.21855982505821114, "rouge2_fmeasure_stderr": 0.0018280376611572316, "rouge2_precision": 0.2571133134231222, "rouge2_precision_stderr": 0.002479315132076248, "rouge2_recall": 0.21080749167967208, "rouge2_recall_stderr": 0.0020117298632678082, "rougeL_fmeasure": 0.3310975491078584, "rougeL_fmeasure_stderr": 0.0019315737242252154, "rougeL_precision": 0.3840121201780919, "rougeL_precision_stderr": 0.002738414708414722, "rougeL_recall": 0.31968271522053765, "rougeL_recall_stderr": 0.002323656464824889, "rougeLsum_fmeasure": 0.37560579687759804, "rougeLsum_fmeasure_stderr": 0.002105408683653576, "rougeLsum_precision": 0.43536474805418546, "rougeLsum_precision_stderr": 0.002981334958507689, "rougeLsum_recall": 0.3619870549422223, "rougeLsum_recall_stderr": 0.00250502271106493}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.6423686135792128, "bleu_stderr": 0.06781279200780907, "rouge1_fmeasure": 0.19917872135908093, "rouge1_fmeasure_stderr": 0.002363364921333995, "rouge1_precision": 0.14487464665671154, "rouge1_precision_stderr": 0.0018722238981939412, "rouge1_recall": 0.33895657354344594, "rouge1_recall_stderr": 0.004069941952483832, "rouge2_fmeasure": 0.041753798281454126, "rouge2_fmeasure_stderr": 0.0014001006643940823, "rouge2_precision": 0.02994505955671013, "rouge2_precision_stderr": 0.0010173785332722297, "rouge2_recall": 0.07345440728895383, "rouge2_recall_stderr": 0.0025142398102111683, "rougeL_fmeasure": 0.15183482706804002, "rougeL_fmeasure_stderr": 0.0017984450209911193, "rougeL_precision": 0.11021900031596973, "rougeL_precision_stderr": 0.0014023873379590325, "rougeL_recall": 0.2598013222997278, "rougeL_recall_stderr": 0.003215833369736362, "rougeLsum_fmeasure": 0.15585053225281267, "rougeLsum_fmeasure_stderr": 0.001999587330221717, "rougeLsum_precision": 0.11296681537198215, "rougeLsum_precision_stderr": 0.0015281816901430067, "rougeLsum_recall": 0.26741998630860214, "rougeLsum_recall_stderr": 0.003606290421968234}}, "1": {"article_DOC_summary": {"bleu": 1.235149038282432, "bleu_stderr": 0.05459414115856849, "rouge1_fmeasure": 0.1721971161395112, "rouge1_fmeasure_stderr": 0.0023562006234985776, "rouge1_precision": 0.122280026305853, "rouge1_precision_stderr": 0.0017590389823740346, "rouge1_recall": 0.30370705741783904, "rouge1_recall_stderr": 0.004055840887234022, "rouge2_fmeasure": 0.033679844792948326, "rouge2_fmeasure_stderr": 0.0012746015477371447, "rouge2_precision": 0.023643830062951152, "rouge2_precision_stderr": 0.0008931623940532598, "rouge2_recall": 0.06132179989926658, "rouge2_recall_stderr": 0.002438002508590267, "rougeL_fmeasure": 0.1348907378250264, "rougeL_fmeasure_stderr": 0.0017557866405550706, "rougeL_precision": 0.09557021937107155, "rougeL_precision_stderr": 0.0012939913446578198, "rougeL_recall": 0.23969285660629983, "rougeL_recall_stderr": 0.0032118748335309482, "rougeLsum_fmeasure": 0.1371410576575297, "rougeLsum_fmeasure_stderr": 0.0019156446628714203, "rougeLsum_precision": 0.09710950720308745, "rougeLsum_precision_stderr": 0.0014054406412306651, "rougeLsum_recall": 0.243798787196214, "rougeLsum_recall_stderr": 0.003461096806942398}}, "2": {"article_DOC_summary": {"bleu": 1.3273790960954637, "bleu_stderr": 0.09176142584084704, "rouge1_fmeasure": 0.1716977478400356, "rouge1_fmeasure_stderr": 0.0023383541607926747, "rouge1_precision": 0.12179901660753824, "rouge1_precision_stderr": 0.0017398315123206822, "rouge1_recall": 0.3032557674354084, "rouge1_recall_stderr": 0.004072457910608588, "rouge2_fmeasure": 0.03433849632199217, "rouge2_fmeasure_stderr": 0.001323022814987283, "rouge2_precision": 0.024087973865443035, "rouge2_precision_stderr": 0.0009276178115034531, "rouge2_recall": 0.06273705485114922, "rouge2_recall_stderr": 0.0025278707457798855, "rougeL_fmeasure": 0.13580451989059478, "rougeL_fmeasure_stderr": 0.0017938300496962917, "rougeL_precision": 0.09614862403870629, "rougeL_precision_stderr": 0.0013184503159874956, "rougeL_recall": 0.24121814724656784, "rougeL_recall_stderr": 0.0032651610116900525, "rougeLsum_fmeasure": 0.13656723774480103, "rougeLsum_fmeasure_stderr": 0.0019323707811520156, "rougeLsum_precision": 0.09657841838844407, "rougeLsum_precision_stderr": 0.0014102801978203182, "rougeLsum_recall": 0.24328652481188623, "rougeLsum_recall_stderr": 0.0035427786513778435}}, "3": {"article_DOC_summary": {"bleu": 1.274999009117922, "bleu_stderr": 0.06533339080542852, "rouge1_fmeasure": 0.16198496719006378, "rouge1_fmeasure_stderr": 0.002461680141885952, "rouge1_precision": 0.11771566145248202, "rouge1_precision_stderr": 0.001974675749475598, "rouge1_recall": 0.2808863267357088, "rouge1_recall_stderr": 0.004232152533592064, "rouge2_fmeasure": 0.03154777210690178, "rouge2_fmeasure_stderr": 0.0012883428762946515, "rouge2_precision": 0.022613297989636686, "rouge2_precision_stderr": 0.0009375138370182145, "rouge2_recall": 0.056449109414913885, "rouge2_recall_stderr": 0.002390241149567977, "rougeL_fmeasure": 0.13058248129799271, "rougeL_fmeasure_stderr": 0.001923392287805453, "rougeL_precision": 0.09482485185210902, "rougeL_precision_stderr": 0.0015307380065247016, "rougeL_recall": 0.22714266166470798, "rougeL_recall_stderr": 0.003379128076656118, "rougeLsum_fmeasure": 0.1305566152116247, "rougeLsum_fmeasure_stderr": 0.0020355825046111564, "rougeLsum_precision": 0.09468447332068473, "rougeLsum_precision_stderr": 0.0016024160577426002, "rougeLsum_recall": 0.22761575606814072, "rougeLsum_recall_stderr": 0.0036049884138439963}}, "4": {"article_DOC_summary": {"bleu": 0.6239086109930185, "bleu_stderr": 0.10973904164090122, "rouge1_fmeasure": 0.04505749328342608, "rouge1_fmeasure_stderr": 0.0025017653409032357, "rouge1_precision": 0.03887834658463857, "rouge1_precision_stderr": 0.0024589794252161197, "rouge1_recall": 0.07093457265274629, "rouge1_recall_stderr": 0.004064248129890208, "rouge2_fmeasure": 0.00898372887228758, "rouge2_fmeasure_stderr": 0.0008296764200563856, "rouge2_precision": 0.007872466165238559, "rouge2_precision_stderr": 0.0011070493553575833, "rouge2_recall": 0.01480404830925788, "rouge2_recall_stderr": 0.0014151698503792067, "rougeL_fmeasure": 0.036426261925246756, "rougeL_fmeasure_stderr": 0.002012862216820716, "rougeL_precision": 0.03187702993768649, "rougeL_precision_stderr": 0.002092811287428047, "rougeL_recall": 0.057397566285207535, "rougeL_recall_stderr": 0.0033039126622202036, "rougeLsum_fmeasure": 0.03710050752016136, "rougeLsum_fmeasure_stderr": 0.0020684468798720184, "rougeLsum_precision": 0.03250357266245564, "rougeLsum_precision_stderr": 0.0021547282635484715, "rougeLsum_recall": 0.05849154360762178, "rougeLsum_recall_stderr": 0.003390553406986616}}, "5": {"article_DOC_summary": {"bleu": 2.0691386052109995e-37, "bleu_stderr": 3.200126715732197e-32, "rouge1_fmeasure": 0.0024093152652346025, "rouge1_fmeasure_stderr": 0.0006928475147292153, "rouge1_precision": 0.0026984260960398582, "rouge1_precision_stderr": 0.0007673471410518447, "rouge1_recall": 0.002245308275951066, "rouge1_recall_stderr": 0.0006501503386827272, "rouge2_fmeasure": 0.00029933155101239577, "rouge2_fmeasure_stderr": 0.0002280273075634093, "rouge2_precision": 0.0003220327582147782, "rouge2_precision_stderr": 0.0002380089009728209, "rouge2_recall": 0.0002816994326428289, "rouge2_recall_stderr": 0.00021956571300715553, "rougeL_fmeasure": 0.0019195425067966257, "rougeL_fmeasure_stderr": 0.0005609555771721628, "rougeL_precision": 0.0021286327127059647, "rougeL_precision_stderr": 0.0006129090759822569, "rougeL_recall": 0.0018080211443602594, "rougeL_recall_stderr": 0.0005332658698664648, "rougeLsum_fmeasure": 0.00195603752522661, "rougeLsum_fmeasure_stderr": 0.0005667582381245831, "rougeLsum_precision": 0.002182234771025004, "rougeLsum_precision_stderr": 0.0006243696224394884, "rougeLsum_recall": 0.0018356867228475056, "rougeLsum_recall_stderr": 0.0005367645348735476}}}}