Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator
#8
by
autoevaluator
HF Staff
- opened
README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
---
|
| 2 |
-
tags:
|
| 3 |
-
- summarization
|
| 4 |
language:
|
| 5 |
- en
|
| 6 |
license: mit
|
|
|
|
|
|
|
| 7 |
model-index:
|
| 8 |
- name: facebook/bart-large-xsum
|
| 9 |
results:
|
|
@@ -16,30 +16,36 @@ model-index:
|
|
| 16 |
config: 3.0.0
|
| 17 |
split: test
|
| 18 |
metrics:
|
| 19 |
-
-
|
| 20 |
-
type: rouge
|
| 21 |
value: 25.2697
|
|
|
|
| 22 |
verified: true
|
| 23 |
-
|
| 24 |
-
|
| 25 |
value: 7.6638
|
|
|
|
| 26 |
verified: true
|
| 27 |
-
|
| 28 |
-
|
| 29 |
value: 17.1808
|
|
|
|
| 30 |
verified: true
|
| 31 |
-
|
| 32 |
-
|
| 33 |
value: 21.7933
|
|
|
|
| 34 |
verified: true
|
| 35 |
-
|
| 36 |
-
|
| 37 |
value: 3.5042972564697266
|
|
|
|
| 38 |
verified: true
|
| 39 |
-
|
| 40 |
-
|
| 41 |
value: 27.4462
|
|
|
|
| 42 |
verified: true
|
|
|
|
| 43 |
- task:
|
| 44 |
type: summarization
|
| 45 |
name: Summarization
|
|
@@ -49,30 +55,36 @@ model-index:
|
|
| 49 |
config: default
|
| 50 |
split: test
|
| 51 |
metrics:
|
| 52 |
-
-
|
| 53 |
-
type: rouge
|
| 54 |
value: 45.4525
|
|
|
|
| 55 |
verified: true
|
| 56 |
-
|
| 57 |
-
|
| 58 |
value: 22.3455
|
|
|
|
| 59 |
verified: true
|
| 60 |
-
|
| 61 |
-
|
| 62 |
value: 37.2302
|
|
|
|
| 63 |
verified: true
|
| 64 |
-
|
| 65 |
-
|
| 66 |
value: 37.2323
|
|
|
|
| 67 |
verified: true
|
| 68 |
-
|
| 69 |
-
|
| 70 |
value: 2.3128726482391357
|
|
|
|
| 71 |
verified: true
|
| 72 |
-
|
| 73 |
-
|
| 74 |
value: 25.5435
|
|
|
|
| 75 |
verified: true
|
|
|
|
| 76 |
- task:
|
| 77 |
type: summarization
|
| 78 |
name: Summarization
|
|
@@ -82,30 +94,36 @@ model-index:
|
|
| 82 |
config: samsum
|
| 83 |
split: train
|
| 84 |
metrics:
|
| 85 |
-
-
|
| 86 |
-
type: rouge
|
| 87 |
value: 24.7852
|
|
|
|
| 88 |
verified: true
|
| 89 |
-
|
| 90 |
-
|
| 91 |
value: 5.2533
|
|
|
|
| 92 |
verified: true
|
| 93 |
-
|
| 94 |
-
|
| 95 |
value: 18.6792
|
|
|
|
| 96 |
verified: true
|
| 97 |
-
|
| 98 |
-
|
| 99 |
value: 20.629
|
|
|
|
| 100 |
verified: true
|
| 101 |
-
|
| 102 |
-
|
| 103 |
value: 3.746837854385376
|
|
|
|
| 104 |
verified: true
|
| 105 |
-
|
| 106 |
-
|
| 107 |
value: 23.1206
|
|
|
|
| 108 |
verified: true
|
|
|
|
| 109 |
- task:
|
| 110 |
type: summarization
|
| 111 |
name: Summarization
|
|
@@ -115,30 +133,36 @@ model-index:
|
|
| 115 |
config: samsum
|
| 116 |
split: test
|
| 117 |
metrics:
|
| 118 |
-
-
|
| 119 |
-
type: rouge
|
| 120 |
value: 24.9158
|
|
|
|
| 121 |
verified: true
|
| 122 |
-
|
| 123 |
-
|
| 124 |
value: 5.5837
|
|
|
|
| 125 |
verified: true
|
| 126 |
-
|
| 127 |
-
|
| 128 |
value: 18.8935
|
|
|
|
| 129 |
verified: true
|
| 130 |
-
|
| 131 |
-
|
| 132 |
value: 20.76
|
|
|
|
| 133 |
verified: true
|
| 134 |
-
|
| 135 |
-
|
| 136 |
value: 3.775235891342163
|
|
|
|
| 137 |
verified: true
|
| 138 |
-
|
| 139 |
-
|
| 140 |
value: 23.0928
|
|
|
|
| 141 |
verified: true
|
|
|
|
| 142 |
---
|
| 143 |
### Bart model finetuned on xsum
|
| 144 |
|
|
|
|
| 1 |
---
|
|
|
|
|
|
|
| 2 |
language:
|
| 3 |
- en
|
| 4 |
license: mit
|
| 5 |
+
tags:
|
| 6 |
+
- summarization
|
| 7 |
model-index:
|
| 8 |
- name: facebook/bart-large-xsum
|
| 9 |
results:
|
|
|
|
| 16 |
config: 3.0.0
|
| 17 |
split: test
|
| 18 |
metrics:
|
| 19 |
+
- type: rouge
|
|
|
|
| 20 |
value: 25.2697
|
| 21 |
+
name: ROUGE-1
|
| 22 |
verified: true
|
| 23 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTM3ZWYzNDIyZWRlYzI2MDRkNmQwNzg4NTBhMzMzMmEwNGI5Mjg4ZGNkYzc0ODJjMWNjM2VkMDczNzk4M2ZhYiIsInZlcnNpb24iOjF9.FhfTibmxB-KfZdA0QA-dlaW2s837Y34litHb4SomxCTctYAuwwuFXhRjaYd1a3Q0RurJAOS5v31-LyQVnBiOBw
|
| 24 |
+
- type: rouge
|
| 25 |
value: 7.6638
|
| 26 |
+
name: ROUGE-2
|
| 27 |
verified: true
|
| 28 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNGI5NWVhYzAxYjRlMjRjNDlmZTRiNmY0Y2ZhYjJhNDA0MWRlZDUwZGIyZDg3MmViOTQ1MmQ3YjQxZjg4MWZhMyIsInZlcnNpb24iOjF9.CR6lwIak_ku4EiobhSpyAhtJmHdqJaBldAgJLGgrI1FZ4fQGWVcz1ugfD5O0amFeA5vYGO4_mppjuRhGR2ZMAA
|
| 29 |
+
- type: rouge
|
| 30 |
value: 17.1808
|
| 31 |
+
name: ROUGE-L
|
| 32 |
verified: true
|
| 33 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZGFhMmQwMWQyYzZhM2ZmOGMzYzI1YTg5MGQ3YWNmYTZjOWNhYjg1YzQ3MDA4MzA4Y2QxZWVlMjgyOWNkZjE3ZCIsInZlcnNpb24iOjF9.bbTmYKsUG57-9gCbk4f789A1GzvUpzjrAGI_GBGgg9TK-Lu56x38scURnsAENrKmEqOjBSTqROkEMZSJQ0bRAg
|
| 34 |
+
- type: rouge
|
| 35 |
value: 21.7933
|
| 36 |
+
name: ROUGE-LSUM
|
| 37 |
verified: true
|
| 38 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzQ0ODY2M2Y4YjMxZTMxNTczYWVmOTQ0MDY1Y2Y5YWM3ZTU1ZTE2YWQyNWM4ZDIyNGZlZDYyM2VkNjQ2MjI2MyIsInZlcnNpb24iOjF9.28iOKCynvIt6kK5mhM6ZzKJsnwjVv_CDMG8veAB0JYeZ4yyrM-tyQUzaorFAbEyb9JBJnpn7YJR9ntGTP3YcCw
|
| 39 |
+
- type: loss
|
| 40 |
value: 3.5042972564697266
|
| 41 |
+
name: loss
|
| 42 |
verified: true
|
| 43 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNmQxMTc2OWU5MWFjODNkNGRjYzU5NWRkOWVlYzY3OGRkMGY2N2NlYmM4MTJkZDRkZTBkMDI0ZWJmYTUxYzZjMSIsInZlcnNpb24iOjF9.PbOoV_245iT1FuAOf03tKAIkBVFRop4XsB26v5qF4dPPeZRP2M2pXJOZli0hILH6hUX0-D3IGDV-a8segNCICw
|
| 44 |
+
- type: gen_len
|
| 45 |
value: 27.4462
|
| 46 |
+
name: gen_len
|
| 47 |
verified: true
|
| 48 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiN2Q1YmRkNTE5OTVlYjdlYzc4YTIyYjhjNjIyOWUxMTc5MDBiNTVjYzQ3NWEyODdhZGFjNzUzNjIzMTcxNzhlYSIsInZlcnNpb24iOjF9.szl4fWDoBqVXKbBQxXV9DFgk9UbFLedmiZmGBI1sKoN69jw8IZopOs4VYtyY5TbpzsVGzpHoZnRCCrVQG8V3AQ
|
| 49 |
- task:
|
| 50 |
type: summarization
|
| 51 |
name: Summarization
|
|
|
|
| 55 |
config: default
|
| 56 |
split: test
|
| 57 |
metrics:
|
| 58 |
+
- type: rouge
|
|
|
|
| 59 |
value: 45.4525
|
| 60 |
+
name: ROUGE-1
|
| 61 |
verified: true
|
| 62 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTIxOWFlMWQxYzBiMzg1ODkxMmI3NTQwYmFiYzlkZDc4YTc3OWNhNDc3YTNmNjdhOGQyNGQ3NWExYTJhMGVjZiIsInZlcnNpb24iOjF9.CEDDzEKRIdxVndsQ5R9P6ROu70YMTUFtKQcDNI2BaHpoux3uqR20xBnd9xVJbaihnTm8Rn3Gz3FSEU6HloteBg
|
| 63 |
+
- type: rouge
|
| 64 |
value: 22.3455
|
| 65 |
+
name: ROUGE-2
|
| 66 |
verified: true
|
| 67 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTBjNTgzNmY4ZGU0MzcyYzNmOGVmYTAwODgyNjNkNjMwNTA2YWRkNDM2NzM2NDQxN2IzOWMxMGFjZDkwY2I0YiIsInZlcnNpb24iOjF9.30NFIQgNYUdqCgIni38Nd0mPjkFAqEQqCnB0p58Csiukp8oZ9NSRUJJHKsxdQ_3mcmkwz4l8C87AdarL-X2wBw
|
| 68 |
+
- type: rouge
|
| 69 |
value: 37.2302
|
| 70 |
+
name: ROUGE-L
|
| 71 |
verified: true
|
| 72 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTg5Yzg3OGM0MTM0ODI0NTJiMmQwM2Q5M2ExZWMwZDU2YjZjNzJmZGVmNWVmMWExYzYxMzRhOTg1MzUyMTY0ZCIsInZlcnNpb24iOjF9.7Os0OXC-gX5s0kcEhiKdSv9j40g_EOIrOXGSMPAZJz3NDP1EkKddSPpha8mwFFkphxYbryg69Z6b1NE3TBa5AQ
|
| 73 |
+
- type: rouge
|
| 74 |
value: 37.2323
|
| 75 |
+
name: ROUGE-LSUM
|
| 76 |
verified: true
|
| 77 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzAyMDUzN2RlZDE5MjdiZGM1NjU2ZjQ1NGJlZTZmNjNlMjc2ZWUxZWU3YmY2OGY0YmJiZWViOTUyMDg2MDdhMiIsInZlcnNpb24iOjF9.M18WnqZ79MnAwz17NRHQ2iqv2_JneQ2SIV2sx10Pi3ACLYLordzcYAeQAGcfh38qQow-TyPXS-MC2Alwjvj3Bw
|
| 78 |
+
- type: loss
|
| 79 |
value: 2.3128726482391357
|
| 80 |
+
name: loss
|
| 81 |
verified: true
|
| 82 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzAwZGZkYTVkZGM1MTNhNjA3NzBmNDMxNGJiODA0MzVhYmZkYjIzYzc1OGYxMjNiMDFhZjkxOGZmYTk1YjUyYyIsInZlcnNpb24iOjF9.i3pCisuPdpYFNLjniPejbseEeh3j6elXWze19As4pUJb3Gxp8uStckPR5rhmV_r-FDP7wKFY2GrqJZrWGGsVBg
|
| 83 |
+
- type: gen_len
|
| 84 |
value: 25.5435
|
| 85 |
+
name: gen_len
|
| 86 |
verified: true
|
| 87 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZTE3ODlkZDhhMTEwNTlhNzVjMWMxMGQyZDc0OTc0NWY0MDBlMzUzNGI3MGQwNmJmNzQ3NTQ5MjhhNDhiYTM5YSIsInZlcnNpb24iOjF9.e7nHzg3OH3zkWiCj3iZVAAQG6Zy0E16_MJzBEEyGTlSVuPGMziNfcjRvLD6WeY_6lXUonEwc9lur0X-qUvB7Aw
|
| 88 |
- task:
|
| 89 |
type: summarization
|
| 90 |
name: Summarization
|
|
|
|
| 94 |
config: samsum
|
| 95 |
split: train
|
| 96 |
metrics:
|
| 97 |
+
- type: rouge
|
|
|
|
| 98 |
value: 24.7852
|
| 99 |
+
name: ROUGE-1
|
| 100 |
verified: true
|
| 101 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiY2Y0NDZlNGM4M2IzMDNhZmI4MTI2ZTI0MmQ4ODYxYTk1Zjc5ZGYyYWQzNmMyN2Q2MDViNDVjNDBlYjVkNDM0MiIsInZlcnNpb24iOjF9.oSFDaNHNgTQ5WnrJqn01jXG2u-5HPhIrmPtyc_LT2kGwGs3dBe89HVsRFs3c6oAumvstKadMJ4TcQy6pDdg1Dw
|
| 102 |
+
- type: rouge
|
| 103 |
value: 5.2533
|
| 104 |
+
name: ROUGE-2
|
| 105 |
verified: true
|
| 106 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTc1ZTJhY2U1MDNlM2EyN2Q0NWJkMDZlYTdkYzM3M2ZmY2JlMzU4ZDQwYWMzZjhhMTU5Y2VlMTIwNWYzNWM1MiIsInZlcnNpb24iOjF9.meJyYKPZRtmT2YLBiBWBxPf1XiZVFDjbFrc5_SVJv3EWvNuXlTZy0qUVbZgE9rnKLA1ND_0Yj1o-qlY8G6iVDg
|
| 107 |
+
- type: rouge
|
| 108 |
value: 18.6792
|
| 109 |
+
name: ROUGE-L
|
| 110 |
verified: true
|
| 111 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzYxMjIzNWIzOTRlYWYwMmQ5YjFiNmM1NDRkMzVhZGI0OTJkMjE0OThmNGEwOWRkZjJiMGYyMTc0Zjc3NjUyNSIsInZlcnNpb24iOjF9.VlXVgQSBVdDjduKV_kg2TRoinJn7kkfsTcLJa_iwDTn2Lw0ZyyOBTcGdfWFcXeteee9m0-iA7uZBGkiaKtQkBw
|
| 112 |
+
- type: rouge
|
| 113 |
value: 20.629
|
| 114 |
+
name: ROUGE-LSUM
|
| 115 |
verified: true
|
| 116 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzQ3NmM1ZTcyMGM3ZDRkZmJmN2IyZmNmOTFkYzYzZmFjNTJmYWVhYmViZGIwY2U0YzIzNWFjYzZjYjc2NGZhMyIsInZlcnNpb24iOjF9.POIXnLpVaPYKk07apBROnvbevoI4LNfs9LAelqJmL5aZsQrvb9w_mUj_y8cr_JtWMcYioKvMQfCNqweMR0QlCw
|
| 117 |
+
- type: loss
|
| 118 |
value: 3.746837854385376
|
| 119 |
+
name: loss
|
| 120 |
verified: true
|
| 121 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMDUxODlhM2NmNTNkY2E5OWE0MmVhMzEzOWFlZDUzMGUzNjQ0OWJmYmUxYWY2NzU3ZjQwZjYyMGQ0MDlkMDA2MyIsInZlcnNpb24iOjF9.dtkcai-opGLauvudNLIxw0GtkNF5DlcUG7A7h2xi42ymyUNigrAg0PcjcjuUt8uW4SEf4oTON5nmlLu924m_Dg
|
| 122 |
+
- type: gen_len
|
| 123 |
value: 23.1206
|
| 124 |
+
name: gen_len
|
| 125 |
verified: true
|
| 126 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNWMzOTVkOWYyYmJkZjY1MDJkMDdkMDhiMmEwNWJjNDU2ZDliMDg4ZTY0ODllM2VlMDUzNWY1NGViZjMyODZlNSIsInZlcnNpb24iOjF9.sLobtAdG5opp6UgM0sMzOQdXKQRbPNFFYng0Yv62loV8Ihwz5aDr8v0rlmmmvjUI45zXKpFdll0MP2mEjD91Aw
|
| 127 |
- task:
|
| 128 |
type: summarization
|
| 129 |
name: Summarization
|
|
|
|
| 133 |
config: samsum
|
| 134 |
split: test
|
| 135 |
metrics:
|
| 136 |
+
- type: rouge
|
|
|
|
| 137 |
value: 24.9158
|
| 138 |
+
name: ROUGE-1
|
| 139 |
verified: true
|
| 140 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYmZmYzYxNDU1YWVmYTNhZTAzYWNhNjE4YmQzNzUwMmJmZjM0OTdlZGY4NDJiNmU1OGM1NGUyZGU3MjEyNmZjZCIsInZlcnNpb24iOjF9.4_VqXLFvNv4EMJSg-vMYoj1BGp5ayLay8soylnHwEqicYeLyYNjeN1aYO4HFn9juBejXWLb1Yhe5n3nET8tRBg
|
| 141 |
+
- type: rouge
|
| 142 |
value: 5.5837
|
| 143 |
+
name: ROUGE-2
|
| 144 |
verified: true
|
| 145 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzU2YWVjOWY3YzAzNjc2NWYxZTkzZDgyMzA4NzY0YWJmNjdlZTVlYWY0MmRkNjhiMjc1ZWZmNGRiYTJiNjNhNiIsInZlcnNpb24iOjF9.10Zo_slj3TtOkQ1ve2w2As8NrLUU1tjqyku7UMREqtYigd56p4SRuFZtr-cfZW1nrVfXrOw0BQlWMF1LevDECA
|
| 146 |
+
- type: rouge
|
| 147 |
value: 18.8935
|
| 148 |
+
name: ROUGE-L
|
| 149 |
verified: true
|
| 150 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYWYwMWY3NmEwZDYxYTMwYmM2NjY1YmFmOWI3Mjg3NmI2MGM5MDQwNzAyMGY0ZWVjNzc0MjJiY2IwNDhjNmEyZSIsInZlcnNpb24iOjF9.sbb6WadT2lIB1JgWxGgMg2hzkSp5sTn5qBbUfUZFupv1ugvpGEE6bCo7fNuYCQRu0qOYvWGNyibhYIAoJqymDQ
|
| 151 |
+
- type: rouge
|
| 152 |
value: 20.76
|
| 153 |
+
name: ROUGE-LSUM
|
| 154 |
verified: true
|
| 155 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTcyNzJhMTU4YjM1ZDgyZWJiMTAyYzAyYTU4Y2E0M2M1YjVmODQzM2JlMmM2YmM3MWZlZTVhNWJkMzdjODhmNCIsInZlcnNpb24iOjF9.YQJDobcKtaOIro8g7Y7opjpfKZ081aJvYKCpzkBhDA5di1GIKtIjGkHqdulqtcGog_L5IcEfr9QBmwIGRFNqCw
|
| 156 |
+
- type: loss
|
| 157 |
value: 3.775235891342163
|
| 158 |
+
name: loss
|
| 159 |
verified: true
|
| 160 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2ViMmVhNGFkZmFiYmU5NmQxZmIzMTBkYTYwOGQ5NzA3MWQxMmZmZjljMmNkNjNkZGFlMTI0MGY2ZDQ3MjAxNSIsInZlcnNpb24iOjF9.YH3xzE3aQCPUXm1591TdRyJo2UM62QcP1705EKxHmg7BzS5VJmZI0-fpEMxegB1aMzNiEr7WSJ7pOWFG_1MwBQ
|
| 161 |
+
- type: gen_len
|
| 162 |
value: 23.0928
|
| 163 |
+
name: gen_len
|
| 164 |
verified: true
|
| 165 |
+
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZWZmZTQ0NjNkZWVlNWUwODNhZWMzMzQ2MGM5NGY2ZDY1OGUzM2JmYzRlN2JjNTczMmRlNWI0MjRhNGM5NjJmYyIsInZlcnNpb24iOjF9.9jC1XMqvVzK6bjwltfHJPswBWIwqbiguGX3onycpTSgbONtx1nsvB163sOfwzRppfGcLPC8E_lmEYqvgej7eCg
|
| 166 |
---
|
| 167 |
### Bart model finetuned on xsum
|
| 168 |
|