Commit
·
c631903
1
Parent(s):
b3b5971
Add
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +10 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_0.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_0.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_1.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_gem_xsum_article_DOC_summary_0.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_gem_xsum_article_DOC_summary_1.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_gem_xsum_article_DOC_summary_2.json +1 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_gem_xsum_article_DOC_summary_0.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_gem_xsum_article_DOC_summary_1.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_gem_xsum_article_DOC_summary_2.jsonl +3 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_0.json +133 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json +133 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_0.json +133 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_1.json +133 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json +133 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.json +133 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.json +133 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_gem_xsum_article_DOC_summary_0.json +133 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_gem_xsum_article_DOC_summary_1.json +133 -0
- 2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_gem_xsum_article_DOC_summary_2.json +133 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_0.json +87 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_0_lm-eval_global_step52452_2023-02-24-23-57-47_0shots_backup.json +87 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_1.json +73 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_1_lm-eval_global_step52452_2023-02-24-23-57-47_1shots_backup.json +73 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_2.json +54 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_2_lm-eval_global_step52452_2023-02-24-23-57-47_2shots_backup.json +54 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_3.json +39 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_3_lm-eval_global_step52452_2023-02-24-23-57-47_3shots_backup.json +39 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_4.json +32 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_4_lm-eval_global_step52452_2023-02-24-23-57-47_4shots_backup.json +32 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_5.json +32 -0
- 2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_5_lm-eval_global_step52452_2023-02-24-23-57-47_5shots_backup.json +32 -0
- 2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- 2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
- 2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
- 2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
- 2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
- 2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
- 2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
.gitattributes
CHANGED
|
@@ -723,3 +723,13 @@ evaluation/seed2/generation/examples.limited=3000.model=seed2.task=GEM-wiki_ling
|
|
| 723 |
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 724 |
2b855b28bc4seed4/evaluation/generation/examples.2b855b28bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 725 |
2b855b28bc4seed4/evaluation/generation/examples.2b855b28bc4seed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 723 |
2b855b28bc4seed3/evaluation/generation/examples.2b855b28bc4seed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 724 |
2b855b28bc4seed4/evaluation/generation/examples.2b855b28bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 725 |
2b855b28bc4seed4/evaluation/generation/examples.2b855b28bc4seed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 726 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 727 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 728 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 729 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 730 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 731 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 732 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 733 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 734 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 735 |
+
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.4066206711982134, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.033723013965742}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07192732542952907, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016382475971692088}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.30547187741815907, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004666013432977962}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10895883012469396, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002008528704603682}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.034124224521766514, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011258346949061335}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.14893550564787877, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0032632079712954696}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05142765596627262, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001246824543408159}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06951188299150508, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015536321007724944}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.29797998285094185, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004578539464668976}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10554163962469583, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018999401584976951}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06880186502940373, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015660770018247742}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2918595743249444, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004365973340593178}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10415553154282955, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018954370177080594}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.441015227234624, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02811928663369506}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0726548726317011, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015123203660414558}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3480211924440434, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005007162611700053}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11213595098636865, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001929589035018206}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.0335432322481532, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009695182541133993}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.16639593533541566, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0034320944741141886}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.051755190143320286, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012231844221965455}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06967949917271966, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014170582927231956}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3335173366403848, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004760042112091419}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10765664842911221, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001806223035008682}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06949682130009485, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00144169867509152}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.33110876740142015, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004635864354540981}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10716257387185534, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018203621728808315}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1493431846788089, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018651057951166279}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2548583336571623, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002528299389518057}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.17482546253918968, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017962093241495317}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.02958059388980271, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007357126571931123}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0521633146578729, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013585554854051705}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03487145839395421, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008257165713416542}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11482322910631781, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012610070217646853}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.2047896774759962, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020475160072816}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13645058466848356, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012505081899530457}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.13765195894138185, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017075576824449126}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.23641320862844858, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023610992069355388}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.16137882388270616, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016412506052086466}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.5382561696068602, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06399544328700199}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1550521213351438, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019541763971361132}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2591201747750158, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026478655932746374}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1796806784514, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018714401391125607}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.030990812889466937, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007609017578133211}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.05362608829041054, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001393078096579126}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03614437414445121, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008460043362571757}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11485232716958933, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012878987216988057}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.19958644094767267, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002041519133152489}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13484320380645512, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001257828140076275}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.14415005003230233, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018020269635249566}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.24228585475636266, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024919558768600326}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.16735135112097876, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017317067736812232}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.7101242197452051, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04333761308856783}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1705870674804116, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002048796485044356}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.28464437226221473, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026857267394890393}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1970048673630101, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001873251264352212}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0375931104436251, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008379763934403864}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0642856259647273, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014690192240657321}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04349341038860359, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000895910412734044}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.12469459823199261, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001380245091026989}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.21573877115274695, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002116828674796055}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.1456514344157347, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012777745388170482}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.15874261945387053, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018989313183848545}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.26633944675442933, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025440410206595122}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.18373940262798896, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001747803293779092}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.1448539044725874, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06277677171239805}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 0.23848794691656622, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03586833580767261}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.019286698361945742, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0006077660476531964}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.03771027274112491, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0012643509148260043}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.02476955743708725, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0007710556690163732}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.0018866790365766187, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0002143326315912907}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.004268812953176167, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00046738906149789073}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.0025241396159283234, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00028150795832003375}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.018711596296599558, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.000549656942001388}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.036704663603431754, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0011874696198706125}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.02408262386824008, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007136995922629838}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.017739797167011156, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0005177890835136993}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.03456546163273569, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0011034522576739757}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.022774229967586195, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0006671086701287083}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 7.611947046679122, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1143274370287527}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.39546550497235883, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00338263349515896}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.41781588842973927, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028231707574303336}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.37951927025471477, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002437652542254183}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.17176180426713916, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002231883030057573}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.17748075850051998, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0020235877252369762}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.162134939165695, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018063315065409062}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.29255451415473116, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0025475028016928546}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.31556464887995433, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023200773160819157}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2827204639054135, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001840892993788695}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3218545869658081, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0030063312376711593}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.33940243915149054, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002593279963684452}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3085637675341545, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002263042771686147}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_gem_xsum_article_DOC_summary_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1489888413499883, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018521966676135623}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.35598499223601365, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004207211166343017}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.20643072459177322, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023952712703594957}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0324623282960817, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010674592282863239}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0820809502686687, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002813424567852905}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04580671342088997, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015030605732593261}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.11153813287033311, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001374306634500298}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.26864922067574026, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0033157904418926283}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.15483993335255772, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018017433912559451}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.1179757836593057, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015154466049348198}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2847650880272033, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003712939966528145}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.16399947665135114, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002023909905331339}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.8439516808341079, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09422907248075703}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_gem_xsum_article_DOC_summary_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1177467243515202, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017455506749855451}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.29171927966859307, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003919612515883716}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1655189451131417, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023018941372401455}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.02141171153130294, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008955978971257212}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.05531529147706192, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002372274102893343}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03052402695525932, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012776222243206263}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0927268161322543, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012892522930530307}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.232421966160188, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0031136409947685123}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.13085315336542225, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017506812383595754}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.09395615061271292, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013824641506762928}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.23524314570007646, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0033502788795521077}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.13257722034008568, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018892826906714033}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.247854057178413, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.053793972634237734}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/agg.2b855b9bc4seed1_gem_xsum_article_DOC_summary_2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1177626659310641, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001707946986197866}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2928721346826048, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0039006436966328977}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.165998653508353, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002288522145343238}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.020528446471707117, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000867911954165022}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.052169823470780564, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022808799038575088}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.029102694813621793, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012269354923261043}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09145048851843915, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012506279990524979}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.22942628081026484, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002973056999961359}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12917404609126437, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016814421892826735}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0940902852266538, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013601724232084945}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.23630864651546127, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003290166668177434}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.13296050139540747, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001845998235299578}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.1975588661664014, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06263558549987819}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac2c6c0a3661d037bb433ac8fe14a7c0e5dbf00fba436c368b2fef20e8576a2d
|
| 3 |
+
size 4136352
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8527bf3911536b4985ba5bdb8ec4ef57ed68fb54907689053fc407ad8fdf01c
|
| 3 |
+
size 5107933
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b49acae1d44640afe3f9498fc72e36ed408feb6ec1d73e40bf2fc4aa0790505c
|
| 3 |
+
size 7702432
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f71c40bacf672771436d42c5ab04506f4ab7ec8a7eb1c2b6d62188707c3f8ab
|
| 3 |
+
size 13309234
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eda9a4971adebe7232e4db9762bd0620932723724b0be02402390f9242efe82c
|
| 3 |
+
size 18914077
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:332176d6b5f914e935c6b47cde67bb0257f0a6877db4be7d9ab8de9910cbc15c
|
| 3 |
+
size 4512453
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31477856abe5cd52629687c390f07f33a2de85d72340f4c85dbf9a416b732378
|
| 3 |
+
size 5211934
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_gem_xsum_article_DOC_summary_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e97f416dce10f7a4afd940f65be2bd40cec276aec7341a4d06ce146d7a7139e
|
| 3 |
+
size 2830001
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_gem_xsum_article_DOC_summary_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2c7a97b865775d0d13e54c7f6fecbabe0f53f98a4cf124870f4d3869c460880
|
| 3 |
+
size 5101260
|
2b855b9bc4seed1/evaluation/generation/examples.2b855b9bc4seed1_gem_xsum_article_DOC_summary_2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4d91dc5ca12749ce48668589949b9f6e0b71282ec8061fd06bb41eb965bc839
|
| 3 |
+
size 7375730
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_0.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/web_nlg_en",
|
| 5 |
+
"prompt_name": "PALM_prompt",
|
| 6 |
+
"bleu": 0.4066206711982134,
|
| 7 |
+
"dataset_path": "GEM/web_nlg",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.033723013965742
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/web_nlg_en",
|
| 14 |
+
"prompt_name": "PALM_prompt",
|
| 15 |
+
"rouge1_precision": 0.07192732542952907,
|
| 16 |
+
"dataset_path": "GEM/web_nlg",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0016382475971692088
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/web_nlg_en",
|
| 23 |
+
"prompt_name": "PALM_prompt",
|
| 24 |
+
"rouge1_recall": 0.30547187741815907,
|
| 25 |
+
"dataset_path": "GEM/web_nlg",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.004666013432977962
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/web_nlg_en",
|
| 32 |
+
"prompt_name": "PALM_prompt",
|
| 33 |
+
"rouge1_fmeasure": 0.10895883012469396,
|
| 34 |
+
"dataset_path": "GEM/web_nlg",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.002008528704603682
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/web_nlg_en",
|
| 41 |
+
"prompt_name": "PALM_prompt",
|
| 42 |
+
"rouge2_precision": 0.034124224521766514,
|
| 43 |
+
"dataset_path": "GEM/web_nlg",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0011258346949061335
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/web_nlg_en",
|
| 50 |
+
"prompt_name": "PALM_prompt",
|
| 51 |
+
"rouge2_recall": 0.14893550564787877,
|
| 52 |
+
"dataset_path": "GEM/web_nlg",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.0032632079712954696
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/web_nlg_en",
|
| 59 |
+
"prompt_name": "PALM_prompt",
|
| 60 |
+
"rouge2_fmeasure": 0.05142765596627262,
|
| 61 |
+
"dataset_path": "GEM/web_nlg",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.001246824543408159
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/web_nlg_en",
|
| 68 |
+
"prompt_name": "PALM_prompt",
|
| 69 |
+
"rougeL_precision": 0.06951188299150508,
|
| 70 |
+
"dataset_path": "GEM/web_nlg",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.0015536321007724944
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/web_nlg_en",
|
| 77 |
+
"prompt_name": "PALM_prompt",
|
| 78 |
+
"rougeL_recall": 0.29797998285094185,
|
| 79 |
+
"dataset_path": "GEM/web_nlg",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.004578539464668976
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/web_nlg_en",
|
| 86 |
+
"prompt_name": "PALM_prompt",
|
| 87 |
+
"rougeL_fmeasure": 0.10554163962469583,
|
| 88 |
+
"dataset_path": "GEM/web_nlg",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.0018999401584976951
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/web_nlg_en",
|
| 95 |
+
"prompt_name": "PALM_prompt",
|
| 96 |
+
"rougeLsum_precision": 0.06880186502940373,
|
| 97 |
+
"dataset_path": "GEM/web_nlg",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.0015660770018247742
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/web_nlg_en",
|
| 104 |
+
"prompt_name": "PALM_prompt",
|
| 105 |
+
"rougeLsum_recall": 0.2918595743249444,
|
| 106 |
+
"dataset_path": "GEM/web_nlg",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.004365973340593178
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/web_nlg_en",
|
| 113 |
+
"prompt_name": "PALM_prompt",
|
| 114 |
+
"rougeLsum_fmeasure": 0.10415553154282955,
|
| 115 |
+
"dataset_path": "GEM/web_nlg",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.0018954370177080594
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 0,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/web_nlg_en",
|
| 5 |
+
"prompt_name": "PALM_prompt",
|
| 6 |
+
"bleu": 0.441015227234624,
|
| 7 |
+
"dataset_path": "GEM/web_nlg",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.02811928663369506
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/web_nlg_en",
|
| 14 |
+
"prompt_name": "PALM_prompt",
|
| 15 |
+
"rouge1_precision": 0.0726548726317011,
|
| 16 |
+
"dataset_path": "GEM/web_nlg",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0015123203660414558
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/web_nlg_en",
|
| 23 |
+
"prompt_name": "PALM_prompt",
|
| 24 |
+
"rouge1_recall": 0.3480211924440434,
|
| 25 |
+
"dataset_path": "GEM/web_nlg",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.005007162611700053
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/web_nlg_en",
|
| 32 |
+
"prompt_name": "PALM_prompt",
|
| 33 |
+
"rouge1_fmeasure": 0.11213595098636865,
|
| 34 |
+
"dataset_path": "GEM/web_nlg",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.001929589035018206
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/web_nlg_en",
|
| 41 |
+
"prompt_name": "PALM_prompt",
|
| 42 |
+
"rouge2_precision": 0.0335432322481532,
|
| 43 |
+
"dataset_path": "GEM/web_nlg",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0009695182541133993
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/web_nlg_en",
|
| 50 |
+
"prompt_name": "PALM_prompt",
|
| 51 |
+
"rouge2_recall": 0.16639593533541566,
|
| 52 |
+
"dataset_path": "GEM/web_nlg",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.0034320944741141886
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/web_nlg_en",
|
| 59 |
+
"prompt_name": "PALM_prompt",
|
| 60 |
+
"rouge2_fmeasure": 0.051755190143320286,
|
| 61 |
+
"dataset_path": "GEM/web_nlg",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0012231844221965455
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/web_nlg_en",
|
| 68 |
+
"prompt_name": "PALM_prompt",
|
| 69 |
+
"rougeL_precision": 0.06967949917271966,
|
| 70 |
+
"dataset_path": "GEM/web_nlg",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.0014170582927231956
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/web_nlg_en",
|
| 77 |
+
"prompt_name": "PALM_prompt",
|
| 78 |
+
"rougeL_recall": 0.3335173366403848,
|
| 79 |
+
"dataset_path": "GEM/web_nlg",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.004760042112091419
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/web_nlg_en",
|
| 86 |
+
"prompt_name": "PALM_prompt",
|
| 87 |
+
"rougeL_fmeasure": 0.10765664842911221,
|
| 88 |
+
"dataset_path": "GEM/web_nlg",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.001806223035008682
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/web_nlg_en",
|
| 95 |
+
"prompt_name": "PALM_prompt",
|
| 96 |
+
"rougeLsum_precision": 0.06949682130009485,
|
| 97 |
+
"dataset_path": "GEM/web_nlg",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.00144169867509152
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/web_nlg_en",
|
| 104 |
+
"prompt_name": "PALM_prompt",
|
| 105 |
+
"rougeLsum_recall": 0.33110876740142015,
|
| 106 |
+
"dataset_path": "GEM/web_nlg",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.004635864354540981
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/web_nlg_en",
|
| 113 |
+
"prompt_name": "PALM_prompt",
|
| 114 |
+
"rougeLsum_fmeasure": 0.10716257387185534,
|
| 115 |
+
"dataset_path": "GEM/web_nlg",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.0018203621728808315
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 1,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_0.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 5 |
+
"prompt_name": "tldr_en",
|
| 6 |
+
"rouge1_precision": 0.1493431846788089,
|
| 7 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"rouge1_precision_stderr": 0.0018651057951166279
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 14 |
+
"prompt_name": "tldr_en",
|
| 15 |
+
"rouge1_recall": 0.2548583336571623,
|
| 16 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_recall_stderr": 0.002528299389518057
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 23 |
+
"prompt_name": "tldr_en",
|
| 24 |
+
"rouge1_fmeasure": 0.17482546253918968,
|
| 25 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_fmeasure_stderr": 0.0017962093241495317
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 32 |
+
"prompt_name": "tldr_en",
|
| 33 |
+
"rouge2_precision": 0.02958059388980271,
|
| 34 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge2_precision_stderr": 0.0007357126571931123
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 41 |
+
"prompt_name": "tldr_en",
|
| 42 |
+
"rouge2_recall": 0.0521633146578729,
|
| 43 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_recall_stderr": 0.0013585554854051705
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 50 |
+
"prompt_name": "tldr_en",
|
| 51 |
+
"rouge2_fmeasure": 0.03487145839395421,
|
| 52 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_fmeasure_stderr": 0.0008257165713416542
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 59 |
+
"prompt_name": "tldr_en",
|
| 60 |
+
"rougeL_precision": 0.11482322910631781,
|
| 61 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rougeL_precision_stderr": 0.0012610070217646853
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 68 |
+
"prompt_name": "tldr_en",
|
| 69 |
+
"rougeL_recall": 0.2047896774759962,
|
| 70 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_recall_stderr": 0.0020475160072816
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 77 |
+
"prompt_name": "tldr_en",
|
| 78 |
+
"rougeL_fmeasure": 0.13645058466848356,
|
| 79 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_fmeasure_stderr": 0.0012505081899530457
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 86 |
+
"prompt_name": "tldr_en",
|
| 87 |
+
"rougeLsum_precision": 0.13765195894138185,
|
| 88 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeLsum_precision_stderr": 0.0017075576824449126
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 95 |
+
"prompt_name": "tldr_en",
|
| 96 |
+
"rougeLsum_recall": 0.23641320862844858,
|
| 97 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_recall_stderr": 0.0023610992069355388
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 104 |
+
"prompt_name": "tldr_en",
|
| 105 |
+
"rougeLsum_fmeasure": 0.16137882388270616,
|
| 106 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_fmeasure_stderr": 0.0016412506052086466
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 113 |
+
"prompt_name": "tldr_en",
|
| 114 |
+
"bleu": 1.5382561696068602,
|
| 115 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"bleu_stderr": 0.06399544328700199
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 0,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_1.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 5 |
+
"prompt_name": "tldr_en",
|
| 6 |
+
"rouge1_precision": 0.1550521213351438,
|
| 7 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"rouge1_precision_stderr": 0.0019541763971361132
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 14 |
+
"prompt_name": "tldr_en",
|
| 15 |
+
"rouge1_recall": 0.2591201747750158,
|
| 16 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_recall_stderr": 0.0026478655932746374
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 23 |
+
"prompt_name": "tldr_en",
|
| 24 |
+
"rouge1_fmeasure": 0.1796806784514,
|
| 25 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_fmeasure_stderr": 0.0018714401391125607
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 32 |
+
"prompt_name": "tldr_en",
|
| 33 |
+
"rouge2_precision": 0.030990812889466937,
|
| 34 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge2_precision_stderr": 0.0007609017578133211
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 41 |
+
"prompt_name": "tldr_en",
|
| 42 |
+
"rouge2_recall": 0.05362608829041054,
|
| 43 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_recall_stderr": 0.001393078096579126
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 50 |
+
"prompt_name": "tldr_en",
|
| 51 |
+
"rouge2_fmeasure": 0.03614437414445121,
|
| 52 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_fmeasure_stderr": 0.0008460043362571757
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 59 |
+
"prompt_name": "tldr_en",
|
| 60 |
+
"rougeL_precision": 0.11485232716958933,
|
| 61 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rougeL_precision_stderr": 0.0012878987216988057
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 68 |
+
"prompt_name": "tldr_en",
|
| 69 |
+
"rougeL_recall": 0.19958644094767267,
|
| 70 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_recall_stderr": 0.002041519133152489
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 77 |
+
"prompt_name": "tldr_en",
|
| 78 |
+
"rougeL_fmeasure": 0.13484320380645512,
|
| 79 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_fmeasure_stderr": 0.001257828140076275
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 86 |
+
"prompt_name": "tldr_en",
|
| 87 |
+
"rougeLsum_precision": 0.14415005003230233,
|
| 88 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeLsum_precision_stderr": 0.0018020269635249566
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 95 |
+
"prompt_name": "tldr_en",
|
| 96 |
+
"rougeLsum_recall": 0.24228585475636266,
|
| 97 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_recall_stderr": 0.0024919558768600326
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 104 |
+
"prompt_name": "tldr_en",
|
| 105 |
+
"rougeLsum_fmeasure": 0.16735135112097876,
|
| 106 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_fmeasure_stderr": 0.0017317067736812232
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 113 |
+
"prompt_name": "tldr_en",
|
| 114 |
+
"bleu": 1.7101242197452051,
|
| 115 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"bleu_stderr": 0.04333761308856783
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 1,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_GEM-wiki_lingua_en_tldr_en_2.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 5 |
+
"prompt_name": "tldr_en",
|
| 6 |
+
"rouge1_precision": 0.1705870674804116,
|
| 7 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"rouge1_precision_stderr": 0.002048796485044356
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 14 |
+
"prompt_name": "tldr_en",
|
| 15 |
+
"rouge1_recall": 0.28464437226221473,
|
| 16 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_recall_stderr": 0.0026857267394890393
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 23 |
+
"prompt_name": "tldr_en",
|
| 24 |
+
"rouge1_fmeasure": 0.1970048673630101,
|
| 25 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_fmeasure_stderr": 0.001873251264352212
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 32 |
+
"prompt_name": "tldr_en",
|
| 33 |
+
"rouge2_precision": 0.0375931104436251,
|
| 34 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge2_precision_stderr": 0.0008379763934403864
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 41 |
+
"prompt_name": "tldr_en",
|
| 42 |
+
"rouge2_recall": 0.0642856259647273,
|
| 43 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_recall_stderr": 0.0014690192240657321
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 50 |
+
"prompt_name": "tldr_en",
|
| 51 |
+
"rouge2_fmeasure": 0.04349341038860359,
|
| 52 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_fmeasure_stderr": 0.000895910412734044
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 59 |
+
"prompt_name": "tldr_en",
|
| 60 |
+
"rougeL_precision": 0.12469459823199261,
|
| 61 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rougeL_precision_stderr": 0.001380245091026989
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 68 |
+
"prompt_name": "tldr_en",
|
| 69 |
+
"rougeL_recall": 0.21573877115274695,
|
| 70 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_recall_stderr": 0.002116828674796055
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 77 |
+
"prompt_name": "tldr_en",
|
| 78 |
+
"rougeL_fmeasure": 0.1456514344157347,
|
| 79 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_fmeasure_stderr": 0.0012777745388170482
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 86 |
+
"prompt_name": "tldr_en",
|
| 87 |
+
"rougeLsum_precision": 0.15874261945387053,
|
| 88 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeLsum_precision_stderr": 0.0018989313183848545
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 95 |
+
"prompt_name": "tldr_en",
|
| 96 |
+
"rougeLsum_recall": 0.26633944675442933,
|
| 97 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_recall_stderr": 0.0025440410206595122
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 104 |
+
"prompt_name": "tldr_en",
|
| 105 |
+
"rougeLsum_fmeasure": 0.18373940262798896,
|
| 106 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_fmeasure_stderr": 0.001747803293779092
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 113 |
+
"prompt_name": "tldr_en",
|
| 114 |
+
"bleu": 2.1448539044725874,
|
| 115 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"bleu_stderr": 0.06277677171239805
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 2,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "e2e_nlg_cleaned",
|
| 5 |
+
"prompt_name": "generate_text_restaurant",
|
| 6 |
+
"bleu": 0.23848794691656622,
|
| 7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.03586833580767261
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "e2e_nlg_cleaned",
|
| 14 |
+
"prompt_name": "generate_text_restaurant",
|
| 15 |
+
"rouge1_precision": 0.019286698361945742,
|
| 16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0006077660476531964
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "e2e_nlg_cleaned",
|
| 23 |
+
"prompt_name": "generate_text_restaurant",
|
| 24 |
+
"rouge1_recall": 0.03771027274112491,
|
| 25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.0012643509148260043
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "e2e_nlg_cleaned",
|
| 32 |
+
"prompt_name": "generate_text_restaurant",
|
| 33 |
+
"rouge1_fmeasure": 0.02476955743708725,
|
| 34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.0007710556690163732
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "e2e_nlg_cleaned",
|
| 41 |
+
"prompt_name": "generate_text_restaurant",
|
| 42 |
+
"rouge2_precision": 0.0018866790365766187,
|
| 43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0002143326315912907
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "e2e_nlg_cleaned",
|
| 50 |
+
"prompt_name": "generate_text_restaurant",
|
| 51 |
+
"rouge2_recall": 0.004268812953176167,
|
| 52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.00046738906149789073
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "e2e_nlg_cleaned",
|
| 59 |
+
"prompt_name": "generate_text_restaurant",
|
| 60 |
+
"rouge2_fmeasure": 0.0025241396159283234,
|
| 61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.00028150795832003375
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "e2e_nlg_cleaned",
|
| 68 |
+
"prompt_name": "generate_text_restaurant",
|
| 69 |
+
"rougeL_precision": 0.018711596296599558,
|
| 70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.000549656942001388
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "e2e_nlg_cleaned",
|
| 77 |
+
"prompt_name": "generate_text_restaurant",
|
| 78 |
+
"rougeL_recall": 0.036704663603431754,
|
| 79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.0011874696198706125
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "e2e_nlg_cleaned",
|
| 86 |
+
"prompt_name": "generate_text_restaurant",
|
| 87 |
+
"rougeL_fmeasure": 0.02408262386824008,
|
| 88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.0007136995922629838
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "e2e_nlg_cleaned",
|
| 95 |
+
"prompt_name": "generate_text_restaurant",
|
| 96 |
+
"rougeLsum_precision": 0.017739797167011156,
|
| 97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.0005177890835136993
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "e2e_nlg_cleaned",
|
| 104 |
+
"prompt_name": "generate_text_restaurant",
|
| 105 |
+
"rougeLsum_recall": 0.03456546163273569,
|
| 106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.0011034522576739757
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "e2e_nlg_cleaned",
|
| 113 |
+
"prompt_name": "generate_text_restaurant",
|
| 114 |
+
"rougeLsum_fmeasure": 0.022774229967586195,
|
| 115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.0006671086701287083
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 0,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "e2e_nlg_cleaned",
|
| 5 |
+
"prompt_name": "generate_text_restaurant",
|
| 6 |
+
"bleu": 7.611947046679122,
|
| 7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.1143274370287527
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "e2e_nlg_cleaned",
|
| 14 |
+
"prompt_name": "generate_text_restaurant",
|
| 15 |
+
"rouge1_precision": 0.39546550497235883,
|
| 16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.00338263349515896
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "e2e_nlg_cleaned",
|
| 23 |
+
"prompt_name": "generate_text_restaurant",
|
| 24 |
+
"rouge1_recall": 0.41781588842973927,
|
| 25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.0028231707574303336
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "e2e_nlg_cleaned",
|
| 32 |
+
"prompt_name": "generate_text_restaurant",
|
| 33 |
+
"rouge1_fmeasure": 0.37951927025471477,
|
| 34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.002437652542254183
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "e2e_nlg_cleaned",
|
| 41 |
+
"prompt_name": "generate_text_restaurant",
|
| 42 |
+
"rouge2_precision": 0.17176180426713916,
|
| 43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.002231883030057573
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "e2e_nlg_cleaned",
|
| 50 |
+
"prompt_name": "generate_text_restaurant",
|
| 51 |
+
"rouge2_recall": 0.17748075850051998,
|
| 52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.0020235877252369762
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "e2e_nlg_cleaned",
|
| 59 |
+
"prompt_name": "generate_text_restaurant",
|
| 60 |
+
"rouge2_fmeasure": 0.162134939165695,
|
| 61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0018063315065409062
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "e2e_nlg_cleaned",
|
| 68 |
+
"prompt_name": "generate_text_restaurant",
|
| 69 |
+
"rougeL_precision": 0.29255451415473116,
|
| 70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.0025475028016928546
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "e2e_nlg_cleaned",
|
| 77 |
+
"prompt_name": "generate_text_restaurant",
|
| 78 |
+
"rougeL_recall": 0.31556464887995433,
|
| 79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.0023200773160819157
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "e2e_nlg_cleaned",
|
| 86 |
+
"prompt_name": "generate_text_restaurant",
|
| 87 |
+
"rougeL_fmeasure": 0.2827204639054135,
|
| 88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.001840892993788695
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "e2e_nlg_cleaned",
|
| 95 |
+
"prompt_name": "generate_text_restaurant",
|
| 96 |
+
"rougeLsum_precision": 0.3218545869658081,
|
| 97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.0030063312376711593
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "e2e_nlg_cleaned",
|
| 104 |
+
"prompt_name": "generate_text_restaurant",
|
| 105 |
+
"rougeLsum_recall": 0.33940243915149054,
|
| 106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.002593279963684452
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "e2e_nlg_cleaned",
|
| 113 |
+
"prompt_name": "generate_text_restaurant",
|
| 114 |
+
"rougeLsum_fmeasure": 0.3085637675341545,
|
| 115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.002263042771686147
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 1,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_gem_xsum_article_DOC_summary_0.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "gem_xsum",
|
| 5 |
+
"prompt_name": "article_DOC_summary",
|
| 6 |
+
"rouge1_precision": 0.1489888413499883,
|
| 7 |
+
"dataset_path": "GEM/xsum",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": "",
|
| 10 |
+
"rouge1_precision_stderr": 0.0018521966676135623
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "gem_xsum",
|
| 14 |
+
"prompt_name": "article_DOC_summary",
|
| 15 |
+
"rouge1_recall": 0.35598499223601365,
|
| 16 |
+
"dataset_path": "GEM/xsum",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": "",
|
| 19 |
+
"rouge1_recall_stderr": 0.004207211166343017
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "gem_xsum",
|
| 23 |
+
"prompt_name": "article_DOC_summary",
|
| 24 |
+
"rouge1_fmeasure": 0.20643072459177322,
|
| 25 |
+
"dataset_path": "GEM/xsum",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": "",
|
| 28 |
+
"rouge1_fmeasure_stderr": 0.0023952712703594957
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "gem_xsum",
|
| 32 |
+
"prompt_name": "article_DOC_summary",
|
| 33 |
+
"rouge2_precision": 0.0324623282960817,
|
| 34 |
+
"dataset_path": "GEM/xsum",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": "",
|
| 37 |
+
"rouge2_precision_stderr": 0.0010674592282863239
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "gem_xsum",
|
| 41 |
+
"prompt_name": "article_DOC_summary",
|
| 42 |
+
"rouge2_recall": 0.0820809502686687,
|
| 43 |
+
"dataset_path": "GEM/xsum",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": "",
|
| 46 |
+
"rouge2_recall_stderr": 0.002813424567852905
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "gem_xsum",
|
| 50 |
+
"prompt_name": "article_DOC_summary",
|
| 51 |
+
"rouge2_fmeasure": 0.04580671342088997,
|
| 52 |
+
"dataset_path": "GEM/xsum",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": "",
|
| 55 |
+
"rouge2_fmeasure_stderr": 0.0015030605732593261
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "gem_xsum",
|
| 59 |
+
"prompt_name": "article_DOC_summary",
|
| 60 |
+
"rougeL_precision": 0.11153813287033311,
|
| 61 |
+
"dataset_path": "GEM/xsum",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": "",
|
| 64 |
+
"rougeL_precision_stderr": 0.001374306634500298
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "gem_xsum",
|
| 68 |
+
"prompt_name": "article_DOC_summary",
|
| 69 |
+
"rougeL_recall": 0.26864922067574026,
|
| 70 |
+
"dataset_path": "GEM/xsum",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": "",
|
| 73 |
+
"rougeL_recall_stderr": 0.0033157904418926283
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "gem_xsum",
|
| 77 |
+
"prompt_name": "article_DOC_summary",
|
| 78 |
+
"rougeL_fmeasure": 0.15483993335255772,
|
| 79 |
+
"dataset_path": "GEM/xsum",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": "",
|
| 82 |
+
"rougeL_fmeasure_stderr": 0.0018017433912559451
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "gem_xsum",
|
| 86 |
+
"prompt_name": "article_DOC_summary",
|
| 87 |
+
"rougeLsum_precision": 0.1179757836593057,
|
| 88 |
+
"dataset_path": "GEM/xsum",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": "",
|
| 91 |
+
"rougeLsum_precision_stderr": 0.0015154466049348198
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "gem_xsum",
|
| 95 |
+
"prompt_name": "article_DOC_summary",
|
| 96 |
+
"rougeLsum_recall": 0.2847650880272033,
|
| 97 |
+
"dataset_path": "GEM/xsum",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": "",
|
| 100 |
+
"rougeLsum_recall_stderr": 0.003712939966528145
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "gem_xsum",
|
| 104 |
+
"prompt_name": "article_DOC_summary",
|
| 105 |
+
"rougeLsum_fmeasure": 0.16399947665135114,
|
| 106 |
+
"dataset_path": "GEM/xsum",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": "",
|
| 109 |
+
"rougeLsum_fmeasure_stderr": 0.002023909905331339
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "gem_xsum",
|
| 113 |
+
"prompt_name": "article_DOC_summary",
|
| 114 |
+
"bleu": 1.8439516808341079,
|
| 115 |
+
"dataset_path": "GEM/xsum",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": "",
|
| 118 |
+
"bleu_stderr": 0.09422907248075703
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 0,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_gem_xsum_article_DOC_summary_1.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "gem_xsum",
|
| 5 |
+
"prompt_name": "article_DOC_summary",
|
| 6 |
+
"rouge1_precision": 0.1177467243515202,
|
| 7 |
+
"dataset_path": "GEM/xsum",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": "",
|
| 10 |
+
"rouge1_precision_stderr": 0.0017455506749855451
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "gem_xsum",
|
| 14 |
+
"prompt_name": "article_DOC_summary",
|
| 15 |
+
"rouge1_recall": 0.29171927966859307,
|
| 16 |
+
"dataset_path": "GEM/xsum",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": "",
|
| 19 |
+
"rouge1_recall_stderr": 0.003919612515883716
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "gem_xsum",
|
| 23 |
+
"prompt_name": "article_DOC_summary",
|
| 24 |
+
"rouge1_fmeasure": 0.1655189451131417,
|
| 25 |
+
"dataset_path": "GEM/xsum",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": "",
|
| 28 |
+
"rouge1_fmeasure_stderr": 0.0023018941372401455
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "gem_xsum",
|
| 32 |
+
"prompt_name": "article_DOC_summary",
|
| 33 |
+
"rouge2_precision": 0.02141171153130294,
|
| 34 |
+
"dataset_path": "GEM/xsum",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": "",
|
| 37 |
+
"rouge2_precision_stderr": 0.0008955978971257212
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "gem_xsum",
|
| 41 |
+
"prompt_name": "article_DOC_summary",
|
| 42 |
+
"rouge2_recall": 0.05531529147706192,
|
| 43 |
+
"dataset_path": "GEM/xsum",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": "",
|
| 46 |
+
"rouge2_recall_stderr": 0.002372274102893343
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "gem_xsum",
|
| 50 |
+
"prompt_name": "article_DOC_summary",
|
| 51 |
+
"rouge2_fmeasure": 0.03052402695525932,
|
| 52 |
+
"dataset_path": "GEM/xsum",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": "",
|
| 55 |
+
"rouge2_fmeasure_stderr": 0.0012776222243206263
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "gem_xsum",
|
| 59 |
+
"prompt_name": "article_DOC_summary",
|
| 60 |
+
"rougeL_precision": 0.0927268161322543,
|
| 61 |
+
"dataset_path": "GEM/xsum",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": "",
|
| 64 |
+
"rougeL_precision_stderr": 0.0012892522930530307
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "gem_xsum",
|
| 68 |
+
"prompt_name": "article_DOC_summary",
|
| 69 |
+
"rougeL_recall": 0.232421966160188,
|
| 70 |
+
"dataset_path": "GEM/xsum",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": "",
|
| 73 |
+
"rougeL_recall_stderr": 0.0031136409947685123
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "gem_xsum",
|
| 77 |
+
"prompt_name": "article_DOC_summary",
|
| 78 |
+
"rougeL_fmeasure": 0.13085315336542225,
|
| 79 |
+
"dataset_path": "GEM/xsum",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": "",
|
| 82 |
+
"rougeL_fmeasure_stderr": 0.0017506812383595754
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "gem_xsum",
|
| 86 |
+
"prompt_name": "article_DOC_summary",
|
| 87 |
+
"rougeLsum_precision": 0.09395615061271292,
|
| 88 |
+
"dataset_path": "GEM/xsum",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": "",
|
| 91 |
+
"rougeLsum_precision_stderr": 0.0013824641506762928
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "gem_xsum",
|
| 95 |
+
"prompt_name": "article_DOC_summary",
|
| 96 |
+
"rougeLsum_recall": 0.23524314570007646,
|
| 97 |
+
"dataset_path": "GEM/xsum",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": "",
|
| 100 |
+
"rougeLsum_recall_stderr": 0.0033502788795521077
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "gem_xsum",
|
| 104 |
+
"prompt_name": "article_DOC_summary",
|
| 105 |
+
"rougeLsum_fmeasure": 0.13257722034008568,
|
| 106 |
+
"dataset_path": "GEM/xsum",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": "",
|
| 109 |
+
"rougeLsum_fmeasure_stderr": 0.0018892826906714033
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "gem_xsum",
|
| 113 |
+
"prompt_name": "article_DOC_summary",
|
| 114 |
+
"bleu": 1.247854057178413,
|
| 115 |
+
"dataset_path": "GEM/xsum",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": "",
|
| 118 |
+
"bleu_stderr": 0.053793972634237734
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 1,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/generation/slim.2b855b9bc4seed1_gem_xsum_article_DOC_summary_2.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "gem_xsum",
|
| 5 |
+
"prompt_name": "article_DOC_summary",
|
| 6 |
+
"rouge1_precision": 0.1177626659310641,
|
| 7 |
+
"dataset_path": "GEM/xsum",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": "",
|
| 10 |
+
"rouge1_precision_stderr": 0.001707946986197866
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "gem_xsum",
|
| 14 |
+
"prompt_name": "article_DOC_summary",
|
| 15 |
+
"rouge1_recall": 0.2928721346826048,
|
| 16 |
+
"dataset_path": "GEM/xsum",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": "",
|
| 19 |
+
"rouge1_recall_stderr": 0.0039006436966328977
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "gem_xsum",
|
| 23 |
+
"prompt_name": "article_DOC_summary",
|
| 24 |
+
"rouge1_fmeasure": 0.165998653508353,
|
| 25 |
+
"dataset_path": "GEM/xsum",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": "",
|
| 28 |
+
"rouge1_fmeasure_stderr": 0.002288522145343238
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "gem_xsum",
|
| 32 |
+
"prompt_name": "article_DOC_summary",
|
| 33 |
+
"rouge2_precision": 0.020528446471707117,
|
| 34 |
+
"dataset_path": "GEM/xsum",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": "",
|
| 37 |
+
"rouge2_precision_stderr": 0.000867911954165022
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "gem_xsum",
|
| 41 |
+
"prompt_name": "article_DOC_summary",
|
| 42 |
+
"rouge2_recall": 0.052169823470780564,
|
| 43 |
+
"dataset_path": "GEM/xsum",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": "",
|
| 46 |
+
"rouge2_recall_stderr": 0.0022808799038575088
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "gem_xsum",
|
| 50 |
+
"prompt_name": "article_DOC_summary",
|
| 51 |
+
"rouge2_fmeasure": 0.029102694813621793,
|
| 52 |
+
"dataset_path": "GEM/xsum",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": "",
|
| 55 |
+
"rouge2_fmeasure_stderr": 0.0012269354923261043
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "gem_xsum",
|
| 59 |
+
"prompt_name": "article_DOC_summary",
|
| 60 |
+
"rougeL_precision": 0.09145048851843915,
|
| 61 |
+
"dataset_path": "GEM/xsum",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": "",
|
| 64 |
+
"rougeL_precision_stderr": 0.0012506279990524979
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "gem_xsum",
|
| 68 |
+
"prompt_name": "article_DOC_summary",
|
| 69 |
+
"rougeL_recall": 0.22942628081026484,
|
| 70 |
+
"dataset_path": "GEM/xsum",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": "",
|
| 73 |
+
"rougeL_recall_stderr": 0.002973056999961359
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "gem_xsum",
|
| 77 |
+
"prompt_name": "article_DOC_summary",
|
| 78 |
+
"rougeL_fmeasure": 0.12917404609126437,
|
| 79 |
+
"dataset_path": "GEM/xsum",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": "",
|
| 82 |
+
"rougeL_fmeasure_stderr": 0.0016814421892826735
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "gem_xsum",
|
| 86 |
+
"prompt_name": "article_DOC_summary",
|
| 87 |
+
"rougeLsum_precision": 0.0940902852266538,
|
| 88 |
+
"dataset_path": "GEM/xsum",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": "",
|
| 91 |
+
"rougeLsum_precision_stderr": 0.0013601724232084945
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "gem_xsum",
|
| 95 |
+
"prompt_name": "article_DOC_summary",
|
| 96 |
+
"rougeLsum_recall": 0.23630864651546127,
|
| 97 |
+
"dataset_path": "GEM/xsum",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": "",
|
| 100 |
+
"rougeLsum_recall_stderr": 0.003290166668177434
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "gem_xsum",
|
| 104 |
+
"prompt_name": "article_DOC_summary",
|
| 105 |
+
"rougeLsum_fmeasure": 0.13296050139540747,
|
| 106 |
+
"dataset_path": "GEM/xsum",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": "",
|
| 109 |
+
"rougeLsum_fmeasure_stderr": 0.001845998235299578
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "gem_xsum",
|
| 113 |
+
"prompt_name": "article_DOC_summary",
|
| 114 |
+
"bleu": 1.1975588661664014,
|
| 115 |
+
"dataset_path": "GEM/xsum",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": "",
|
| 118 |
+
"bleu_stderr": 0.06263558549987819
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4seeds/2b855b9bc4seed1/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 2,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_0.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.34,
|
| 5 |
+
"acc_stderr": 0.014987482264363937
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.333,
|
| 9 |
+
"acc_stderr": 0.014910846164229864
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.3433333333333333,
|
| 13 |
+
"acc_stderr": 0.01371263383046586
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.44642857142857145,
|
| 17 |
+
"acc_stderr": 0.06703189227942398,
|
| 18 |
+
"f1": 0.2956393200295639
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.71,
|
| 22 |
+
"acc_stderr": 0.04560480215720684
|
| 23 |
+
},
|
| 24 |
+
"hellaswag": {
|
| 25 |
+
"acc": 0.43328022306313485,
|
| 26 |
+
"acc_stderr": 0.004945157565218188,
|
| 27 |
+
"acc_norm": 0.5569607647878908,
|
| 28 |
+
"acc_norm_stderr": 0.004957296691391566
|
| 29 |
+
},
|
| 30 |
+
"rte": {
|
| 31 |
+
"acc": 0.5415162454873647,
|
| 32 |
+
"acc_stderr": 0.029992535385373317
|
| 33 |
+
},
|
| 34 |
+
"winogrande": {
|
| 35 |
+
"acc": 0.5477505919494869,
|
| 36 |
+
"acc_stderr": 0.013988256216606007
|
| 37 |
+
},
|
| 38 |
+
"storycloze_2016": {
|
| 39 |
+
"acc": 0.6916087653661144,
|
| 40 |
+
"acc_stderr": 0.0106797344454878
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.5351681957186545,
|
| 44 |
+
"acc_stderr": 0.008723396352960192
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.5631313131313131,
|
| 48 |
+
"acc_stderr": 0.010177672928157685,
|
| 49 |
+
"acc_norm": 0.49747474747474746,
|
| 50 |
+
"acc_norm_stderr": 0.01025965266878347
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.2525597269624573,
|
| 54 |
+
"acc_stderr": 0.012696728980207708,
|
| 55 |
+
"acc_norm": 0.28071672354948807,
|
| 56 |
+
"acc_norm_stderr": 0.013131238126975583
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.805,
|
| 60 |
+
"acc_stderr": 0.012535235623319329,
|
| 61 |
+
"acc_norm": 0.71,
|
| 62 |
+
"acc_norm_stderr": 0.014356395999905684
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7442872687704026,
|
| 66 |
+
"acc_stderr": 0.010178690109459855,
|
| 67 |
+
"acc_norm": 0.750272034820457,
|
| 68 |
+
"acc_norm_stderr": 0.010099232969867469
|
| 69 |
+
}
|
| 70 |
+
},
|
| 71 |
+
"versions": {
|
| 72 |
+
"anli_r1": 0,
|
| 73 |
+
"anli_r2": 0,
|
| 74 |
+
"anli_r3": 0,
|
| 75 |
+
"cb": 1,
|
| 76 |
+
"copa": 0,
|
| 77 |
+
"hellaswag": 0,
|
| 78 |
+
"rte": 0,
|
| 79 |
+
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
+
}
|
| 87 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_0_lm-eval_global_step52452_2023-02-24-23-57-47_0shots_backup.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.34,
|
| 5 |
+
"acc_stderr": 0.014987482264363937
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.333,
|
| 9 |
+
"acc_stderr": 0.014910846164229864
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.3433333333333333,
|
| 13 |
+
"acc_stderr": 0.01371263383046586
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.44642857142857145,
|
| 17 |
+
"acc_stderr": 0.06703189227942398,
|
| 18 |
+
"f1": 0.2956393200295639
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.71,
|
| 22 |
+
"acc_stderr": 0.04560480215720684
|
| 23 |
+
},
|
| 24 |
+
"hellaswag": {
|
| 25 |
+
"acc": 0.43328022306313485,
|
| 26 |
+
"acc_stderr": 0.004945157565218188,
|
| 27 |
+
"acc_norm": 0.5569607647878908,
|
| 28 |
+
"acc_norm_stderr": 0.004957296691391566
|
| 29 |
+
},
|
| 30 |
+
"rte": {
|
| 31 |
+
"acc": 0.5415162454873647,
|
| 32 |
+
"acc_stderr": 0.029992535385373317
|
| 33 |
+
},
|
| 34 |
+
"winogrande": {
|
| 35 |
+
"acc": 0.5477505919494869,
|
| 36 |
+
"acc_stderr": 0.013988256216606007
|
| 37 |
+
},
|
| 38 |
+
"storycloze_2016": {
|
| 39 |
+
"acc": 0.6916087653661144,
|
| 40 |
+
"acc_stderr": 0.0106797344454878
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.5351681957186545,
|
| 44 |
+
"acc_stderr": 0.008723396352960192
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.5631313131313131,
|
| 48 |
+
"acc_stderr": 0.010177672928157685,
|
| 49 |
+
"acc_norm": 0.49747474747474746,
|
| 50 |
+
"acc_norm_stderr": 0.01025965266878347
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.2525597269624573,
|
| 54 |
+
"acc_stderr": 0.012696728980207708,
|
| 55 |
+
"acc_norm": 0.28071672354948807,
|
| 56 |
+
"acc_norm_stderr": 0.013131238126975583
|
| 57 |
+
},
|
| 58 |
+
"sciq": {
|
| 59 |
+
"acc": 0.805,
|
| 60 |
+
"acc_stderr": 0.012535235623319329,
|
| 61 |
+
"acc_norm": 0.71,
|
| 62 |
+
"acc_norm_stderr": 0.014356395999905684
|
| 63 |
+
},
|
| 64 |
+
"piqa": {
|
| 65 |
+
"acc": 0.7442872687704026,
|
| 66 |
+
"acc_stderr": 0.010178690109459855,
|
| 67 |
+
"acc_norm": 0.750272034820457,
|
| 68 |
+
"acc_norm_stderr": 0.010099232969867469
|
| 69 |
+
}
|
| 70 |
+
},
|
| 71 |
+
"versions": {
|
| 72 |
+
"anli_r1": 0,
|
| 73 |
+
"anli_r2": 0,
|
| 74 |
+
"anli_r3": 0,
|
| 75 |
+
"cb": 1,
|
| 76 |
+
"copa": 0,
|
| 77 |
+
"hellaswag": 0,
|
| 78 |
+
"rte": 0,
|
| 79 |
+
"winogrande": 0,
|
| 80 |
+
"storycloze_2016": 0,
|
| 81 |
+
"boolq": 1,
|
| 82 |
+
"arc_easy": 0,
|
| 83 |
+
"arc_challenge": 0,
|
| 84 |
+
"sciq": 0,
|
| 85 |
+
"piqa": 0
|
| 86 |
+
}
|
| 87 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_1.json
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.323,
|
| 5 |
+
"acc_stderr": 0.014794927843348635
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.348,
|
| 9 |
+
"acc_stderr": 0.01507060460376841
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.3416666666666667,
|
| 13 |
+
"acc_stderr": 0.013696658778002519
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.5178571428571429,
|
| 17 |
+
"acc_stderr": 0.06737697508644648,
|
| 18 |
+
"f1": 0.3656150648080215
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.74,
|
| 22 |
+
"acc_stderr": 0.04408440022768078
|
| 23 |
+
},
|
| 24 |
+
"hellaswag": {
|
| 25 |
+
"acc": 0.4282015534754033,
|
| 26 |
+
"acc_stderr": 0.004938068627349495,
|
| 27 |
+
"acc_norm": 0.555964947221669,
|
| 28 |
+
"acc_norm_stderr": 0.0049584261524818945
|
| 29 |
+
},
|
| 30 |
+
"rte": {
|
| 31 |
+
"acc": 0.5415162454873647,
|
| 32 |
+
"acc_stderr": 0.029992535385373314
|
| 33 |
+
},
|
| 34 |
+
"winogrande": {
|
| 35 |
+
"acc": 0.5564325177584846,
|
| 36 |
+
"acc_stderr": 0.0139626949076204
|
| 37 |
+
},
|
| 38 |
+
"storycloze_2016": {
|
| 39 |
+
"acc": 0.6787814003206841,
|
| 40 |
+
"acc_stderr": 0.010798029402794916
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.555045871559633,
|
| 44 |
+
"acc_stderr": 0.008691897543539221
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.5740740740740741,
|
| 48 |
+
"acc_stderr": 0.010146568651002255,
|
| 49 |
+
"acc_norm": 0.5332491582491582,
|
| 50 |
+
"acc_norm_stderr": 0.010237073872130745
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.26023890784982934,
|
| 54 |
+
"acc_stderr": 0.012821930225112566,
|
| 55 |
+
"acc_norm": 0.27986348122866894,
|
| 56 |
+
"acc_norm_stderr": 0.013119040897725923
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
"versions": {
|
| 60 |
+
"anli_r1": 0,
|
| 61 |
+
"anli_r2": 0,
|
| 62 |
+
"anli_r3": 0,
|
| 63 |
+
"cb": 1,
|
| 64 |
+
"copa": 0,
|
| 65 |
+
"hellaswag": 0,
|
| 66 |
+
"rte": 0,
|
| 67 |
+
"winogrande": 0,
|
| 68 |
+
"storycloze_2016": 0,
|
| 69 |
+
"boolq": 1,
|
| 70 |
+
"arc_easy": 0,
|
| 71 |
+
"arc_challenge": 0
|
| 72 |
+
}
|
| 73 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_1_lm-eval_global_step52452_2023-02-24-23-57-47_1shots_backup.json
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.323,
|
| 5 |
+
"acc_stderr": 0.014794927843348635
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.348,
|
| 9 |
+
"acc_stderr": 0.01507060460376841
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.3416666666666667,
|
| 13 |
+
"acc_stderr": 0.013696658778002519
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.5178571428571429,
|
| 17 |
+
"acc_stderr": 0.06737697508644648,
|
| 18 |
+
"f1": 0.3656150648080215
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.74,
|
| 22 |
+
"acc_stderr": 0.04408440022768078
|
| 23 |
+
},
|
| 24 |
+
"hellaswag": {
|
| 25 |
+
"acc": 0.4282015534754033,
|
| 26 |
+
"acc_stderr": 0.004938068627349495,
|
| 27 |
+
"acc_norm": 0.555964947221669,
|
| 28 |
+
"acc_norm_stderr": 0.0049584261524818945
|
| 29 |
+
},
|
| 30 |
+
"rte": {
|
| 31 |
+
"acc": 0.5415162454873647,
|
| 32 |
+
"acc_stderr": 0.029992535385373314
|
| 33 |
+
},
|
| 34 |
+
"winogrande": {
|
| 35 |
+
"acc": 0.5564325177584846,
|
| 36 |
+
"acc_stderr": 0.0139626949076204
|
| 37 |
+
},
|
| 38 |
+
"storycloze_2016": {
|
| 39 |
+
"acc": 0.6787814003206841,
|
| 40 |
+
"acc_stderr": 0.010798029402794916
|
| 41 |
+
},
|
| 42 |
+
"boolq": {
|
| 43 |
+
"acc": 0.555045871559633,
|
| 44 |
+
"acc_stderr": 0.008691897543539221
|
| 45 |
+
},
|
| 46 |
+
"arc_easy": {
|
| 47 |
+
"acc": 0.5740740740740741,
|
| 48 |
+
"acc_stderr": 0.010146568651002255,
|
| 49 |
+
"acc_norm": 0.5332491582491582,
|
| 50 |
+
"acc_norm_stderr": 0.010237073872130745
|
| 51 |
+
},
|
| 52 |
+
"arc_challenge": {
|
| 53 |
+
"acc": 0.26023890784982934,
|
| 54 |
+
"acc_stderr": 0.012821930225112566,
|
| 55 |
+
"acc_norm": 0.27986348122866894,
|
| 56 |
+
"acc_norm_stderr": 0.013119040897725923
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
"versions": {
|
| 60 |
+
"anli_r1": 0,
|
| 61 |
+
"anli_r2": 0,
|
| 62 |
+
"anli_r3": 0,
|
| 63 |
+
"cb": 1,
|
| 64 |
+
"copa": 0,
|
| 65 |
+
"hellaswag": 0,
|
| 66 |
+
"rte": 0,
|
| 67 |
+
"winogrande": 0,
|
| 68 |
+
"storycloze_2016": 0,
|
| 69 |
+
"boolq": 1,
|
| 70 |
+
"arc_easy": 0,
|
| 71 |
+
"arc_challenge": 0
|
| 72 |
+
}
|
| 73 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_2.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.32,
|
| 5 |
+
"acc_stderr": 0.014758652303574874
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.334,
|
| 9 |
+
"acc_stderr": 0.014922019523732968
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.345,
|
| 13 |
+
"acc_stderr": 0.013728421539454878
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.42857142857142855,
|
| 17 |
+
"acc_stderr": 0.06672848092813058,
|
| 18 |
+
"f1": 0.291005291005291
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.71,
|
| 22 |
+
"acc_stderr": 0.045604802157206845
|
| 23 |
+
},
|
| 24 |
+
"hellaswag": {
|
| 25 |
+
"acc": 0.42929695279824737,
|
| 26 |
+
"acc_stderr": 0.004939642460172587,
|
| 27 |
+
"acc_norm": 0.5593507269468233,
|
| 28 |
+
"acc_norm_stderr": 0.004954503606471611
|
| 29 |
+
},
|
| 30 |
+
"rte": {
|
| 31 |
+
"acc": 0.49097472924187724,
|
| 32 |
+
"acc_stderr": 0.030091559826331334
|
| 33 |
+
},
|
| 34 |
+
"winogrande": {
|
| 35 |
+
"acc": 0.5611681136543015,
|
| 36 |
+
"acc_stderr": 0.013946933444507032
|
| 37 |
+
},
|
| 38 |
+
"storycloze_2016": {
|
| 39 |
+
"acc": 0.686798503474078,
|
| 40 |
+
"acc_stderr": 0.010725209422929404
|
| 41 |
+
}
|
| 42 |
+
},
|
| 43 |
+
"versions": {
|
| 44 |
+
"anli_r1": 0,
|
| 45 |
+
"anli_r2": 0,
|
| 46 |
+
"anli_r3": 0,
|
| 47 |
+
"cb": 1,
|
| 48 |
+
"copa": 0,
|
| 49 |
+
"hellaswag": 0,
|
| 50 |
+
"rte": 0,
|
| 51 |
+
"winogrande": 0,
|
| 52 |
+
"storycloze_2016": 0
|
| 53 |
+
}
|
| 54 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_2_lm-eval_global_step52452_2023-02-24-23-57-47_2shots_backup.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.32,
|
| 5 |
+
"acc_stderr": 0.014758652303574874
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.334,
|
| 9 |
+
"acc_stderr": 0.014922019523732968
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.345,
|
| 13 |
+
"acc_stderr": 0.013728421539454878
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.42857142857142855,
|
| 17 |
+
"acc_stderr": 0.06672848092813058,
|
| 18 |
+
"f1": 0.291005291005291
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.71,
|
| 22 |
+
"acc_stderr": 0.045604802157206845
|
| 23 |
+
},
|
| 24 |
+
"hellaswag": {
|
| 25 |
+
"acc": 0.42929695279824737,
|
| 26 |
+
"acc_stderr": 0.004939642460172587,
|
| 27 |
+
"acc_norm": 0.5593507269468233,
|
| 28 |
+
"acc_norm_stderr": 0.004954503606471611
|
| 29 |
+
},
|
| 30 |
+
"rte": {
|
| 31 |
+
"acc": 0.49097472924187724,
|
| 32 |
+
"acc_stderr": 0.030091559826331334
|
| 33 |
+
},
|
| 34 |
+
"winogrande": {
|
| 35 |
+
"acc": 0.5611681136543015,
|
| 36 |
+
"acc_stderr": 0.013946933444507032
|
| 37 |
+
},
|
| 38 |
+
"storycloze_2016": {
|
| 39 |
+
"acc": 0.686798503474078,
|
| 40 |
+
"acc_stderr": 0.010725209422929404
|
| 41 |
+
}
|
| 42 |
+
},
|
| 43 |
+
"versions": {
|
| 44 |
+
"anli_r1": 0,
|
| 45 |
+
"anli_r2": 0,
|
| 46 |
+
"anli_r3": 0,
|
| 47 |
+
"cb": 1,
|
| 48 |
+
"copa": 0,
|
| 49 |
+
"hellaswag": 0,
|
| 50 |
+
"rte": 0,
|
| 51 |
+
"winogrande": 0,
|
| 52 |
+
"storycloze_2016": 0
|
| 53 |
+
}
|
| 54 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_3.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.325,
|
| 5 |
+
"acc_stderr": 0.014818724459095524
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.35,
|
| 9 |
+
"acc_stderr": 0.015090650341444231
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.33166666666666667,
|
| 13 |
+
"acc_stderr": 0.013596836729485157
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.5357142857142857,
|
| 17 |
+
"acc_stderr": 0.06724777654937658,
|
| 18 |
+
"f1": 0.4369505854187708
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.75,
|
| 22 |
+
"acc_stderr": 0.04351941398892446
|
| 23 |
+
},
|
| 24 |
+
"hellaswag": {
|
| 25 |
+
"acc": 0.427504481179048,
|
| 26 |
+
"acc_stderr": 0.0049370542337115715,
|
| 27 |
+
"acc_norm": 0.5574586735710018,
|
| 28 |
+
"acc_norm_stderr": 0.004956724392646532
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"versions": {
|
| 32 |
+
"anli_r1": 0,
|
| 33 |
+
"anli_r2": 0,
|
| 34 |
+
"anli_r3": 0,
|
| 35 |
+
"cb": 1,
|
| 36 |
+
"copa": 0,
|
| 37 |
+
"hellaswag": 0
|
| 38 |
+
}
|
| 39 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_3_lm-eval_global_step52452_2023-02-24-23-57-47_3shots_backup.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.325,
|
| 5 |
+
"acc_stderr": 0.014818724459095524
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.35,
|
| 9 |
+
"acc_stderr": 0.015090650341444231
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.33166666666666667,
|
| 13 |
+
"acc_stderr": 0.013596836729485157
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.5357142857142857,
|
| 17 |
+
"acc_stderr": 0.06724777654937658,
|
| 18 |
+
"f1": 0.4369505854187708
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.75,
|
| 22 |
+
"acc_stderr": 0.04351941398892446
|
| 23 |
+
},
|
| 24 |
+
"hellaswag": {
|
| 25 |
+
"acc": 0.427504481179048,
|
| 26 |
+
"acc_stderr": 0.0049370542337115715,
|
| 27 |
+
"acc_norm": 0.5574586735710018,
|
| 28 |
+
"acc_norm_stderr": 0.004956724392646532
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"versions": {
|
| 32 |
+
"anli_r1": 0,
|
| 33 |
+
"anli_r2": 0,
|
| 34 |
+
"anli_r3": 0,
|
| 35 |
+
"cb": 1,
|
| 36 |
+
"copa": 0,
|
| 37 |
+
"hellaswag": 0
|
| 38 |
+
}
|
| 39 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_4.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.342,
|
| 5 |
+
"acc_stderr": 0.01500870618212173
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.367,
|
| 9 |
+
"acc_stderr": 0.015249378464171754
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.35333333333333333,
|
| 13 |
+
"acc_stderr": 0.01380457216231493
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.44642857142857145,
|
| 17 |
+
"acc_stderr": 0.067031892279424,
|
| 18 |
+
"f1": 0.26007168458781366
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.72,
|
| 22 |
+
"acc_stderr": 0.04512608598542127
|
| 23 |
+
}
|
| 24 |
+
},
|
| 25 |
+
"versions": {
|
| 26 |
+
"anli_r1": 0,
|
| 27 |
+
"anli_r2": 0,
|
| 28 |
+
"anli_r3": 0,
|
| 29 |
+
"cb": 1,
|
| 30 |
+
"copa": 0
|
| 31 |
+
}
|
| 32 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_4_lm-eval_global_step52452_2023-02-24-23-57-47_4shots_backup.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.342,
|
| 5 |
+
"acc_stderr": 0.01500870618212173
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.367,
|
| 9 |
+
"acc_stderr": 0.015249378464171754
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.35333333333333333,
|
| 13 |
+
"acc_stderr": 0.01380457216231493
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.44642857142857145,
|
| 17 |
+
"acc_stderr": 0.067031892279424,
|
| 18 |
+
"f1": 0.26007168458781366
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.72,
|
| 22 |
+
"acc_stderr": 0.04512608598542127
|
| 23 |
+
}
|
| 24 |
+
},
|
| 25 |
+
"versions": {
|
| 26 |
+
"anli_r1": 0,
|
| 27 |
+
"anli_r2": 0,
|
| 28 |
+
"anli_r3": 0,
|
| 29 |
+
"cb": 1,
|
| 30 |
+
"copa": 0
|
| 31 |
+
}
|
| 32 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_5.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.346,
|
| 5 |
+
"acc_stderr": 0.015050266127564448
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.344,
|
| 9 |
+
"acc_stderr": 0.015029633724408947
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.3516666666666667,
|
| 13 |
+
"acc_stderr": 0.013789711695404789
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.5357142857142857,
|
| 17 |
+
"acc_stderr": 0.06724777654937658,
|
| 18 |
+
"f1": 0.31399711399711405
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.72,
|
| 22 |
+
"acc_stderr": 0.04512608598542127
|
| 23 |
+
}
|
| 24 |
+
},
|
| 25 |
+
"versions": {
|
| 26 |
+
"anli_r1": 0,
|
| 27 |
+
"anli_r2": 0,
|
| 28 |
+
"anli_r3": 0,
|
| 29 |
+
"cb": 1,
|
| 30 |
+
"copa": 0
|
| 31 |
+
}
|
| 32 |
+
}
|
2b855b9bc4seed1/evaluation/rankeval/2b855b9bc4seed1_5_lm-eval_global_step52452_2023-02-24-23-57-47_5shots_backup.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"anli_r1": {
|
| 4 |
+
"acc": 0.346,
|
| 5 |
+
"acc_stderr": 0.015050266127564448
|
| 6 |
+
},
|
| 7 |
+
"anli_r2": {
|
| 8 |
+
"acc": 0.344,
|
| 9 |
+
"acc_stderr": 0.015029633724408947
|
| 10 |
+
},
|
| 11 |
+
"anli_r3": {
|
| 12 |
+
"acc": 0.3516666666666667,
|
| 13 |
+
"acc_stderr": 0.013789711695404789
|
| 14 |
+
},
|
| 15 |
+
"cb": {
|
| 16 |
+
"acc": 0.5357142857142857,
|
| 17 |
+
"acc_stderr": 0.06724777654937658,
|
| 18 |
+
"f1": 0.31399711399711405
|
| 19 |
+
},
|
| 20 |
+
"copa": {
|
| 21 |
+
"acc": 0.72,
|
| 22 |
+
"acc_stderr": 0.04512608598542127
|
| 23 |
+
}
|
| 24 |
+
},
|
| 25 |
+
"versions": {
|
| 26 |
+
"anli_r1": 0,
|
| 27 |
+
"anli_r2": 0,
|
| 28 |
+
"anli_r3": 0,
|
| 29 |
+
"cb": 1,
|
| 30 |
+
"copa": 0
|
| 31 |
+
}
|
| 32 |
+
}
|
2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a674c3bcc06cd1542b790f270811a3efd918990625d60ac9241fcd546fe1400f
|
| 3 |
+
size 131677719
|
2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa989fcf19341cf3249f5acad98454dc08afd8faf08219fb233109b0fd9da8dc
|
| 3 |
+
size 131677805
|
2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daa10f7aff031e1ec106702d10f69dbeb91192c2b45413a5de8aece69cb858da
|
| 3 |
+
size 131677741
|
2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68990ea35af060082bfeb0fe02ac5c8e9fc26c41ebb2a17610469173ab7eea9b
|
| 3 |
+
size 131677741
|
2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e10c9c69d47b8a4743a6e4f982a2dabe308387ee6359dc72cb6a0019cd72eae8
|
| 3 |
+
size 131677741
|
2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:417416b6961f27e0e24a32a886a7646cc57421a3c220b5c0bd36e8ce9f04dbb8
|
| 3 |
+
size 131677741
|
2b855b9bc4seed1/global_step52452/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37047b305e3b58f53d0d17697838167858d5ba00132edf5e2d9dd04a49697f88
|
| 3 |
+
size 131677677
|