add octocoder and octogeex
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- community_results/bigcode_octocoder_loubnabnl/bigcode_octocoder_loubnabnl.json +1 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_humaneval_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_humanevalsynthesize-python_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-cpp_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-d_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-java_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-jl_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-js_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-lua_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-php_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-r_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-rkt_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-rs_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-swift_octocoder.json +0 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_humaneval_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_humanevalsynthesize-python_octocoder.json +43 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-cpp_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-d_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-java_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-jl_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-js_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-lua_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-php_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-r_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-rkt_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-rs_octocoder.json +11 -0
- community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-swift_octocoder.json +11 -0
- community_results/bigcode_octogeex_loubnabnl/bigcode_octogeex_loubnabnl.json +1 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_humaneval_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_humanevalsynthesize-python_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-cpp_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-d_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-java_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-jl_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-js_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-lua_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-php_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-r_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-rkt_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-rs_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-swift_octogeex.json +0 -0
- community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_humaneval_octogeex.json +11 -0
- community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_humanevalsynthesize-python_octogeex.json +43 -0
- community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-cpp_octogeex.json +11 -0
- community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-d_octogeex.json +11 -0
- community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-java_octogeex.json +11 -0
- community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-jl_octogeex.json +11 -0
- community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-js_octogeex.json +11 -0
- community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-lua_octogeex.json +11 -0
- community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-php_octogeex.json +11 -0
community_results/bigcode_octocoder_loubnabnl/bigcode_octocoder_loubnabnl.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task": "multiple-swift", "pass@1": 0.18240506329113923}, {"task": "multiple-lua", "pass@1": 0.2255900621118012}, {"task": "multiple-rkt", "pass@1": 0.10608695652173915}, {"task": "multiple-js", "pass@1": 0.32795031055900625}, {"task": "multiple-d", "pass@1": 0.1334615384615385}, {"task": "multiple-r", "pass@1": 0.14385093167701862}, {"task": "multiple-cpp", "pass@1": 0.29316770186335417}, {"task": "multiple-rs", "pass@1": 0.24256410256410257}, {"task": "multiple-jl", "pass@1": 0.24503144654088063}, {"task": "multiple-php", "pass@1": 0.2675776397515528}, {"task": "humaneval", "pass@1": 0.355609756097561}, {"task": "multiple-java", "pass@1": 0.26025316455696207}], "meta": {"model": "bigcode/octocoder"}}
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_humaneval_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_humanevalsynthesize-python_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-cpp_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-d_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-java_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-jl_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-js_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-lua_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-php_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-r_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-rkt_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-rs_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/generations_octocoder/generations_multiple-swift_octocoder.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_humaneval_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"humaneval": {
|
| 3 |
+
"pass@1": 0.355609756097561,
|
| 4 |
+
"pass@10": 0.5180529150889583
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_humanevalsynthesize-python_octocoder.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"humanevalsynthesize-python": {
|
| 3 |
+
"pass@1": 0.45304878048780495,
|
| 4 |
+
"pass@10": 0.6338166655659803
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"prefix": "",
|
| 8 |
+
"do_sample": true,
|
| 9 |
+
"temperature": 0.2,
|
| 10 |
+
"top_k": 0,
|
| 11 |
+
"top_p": 0.95,
|
| 12 |
+
"n_samples": 50,
|
| 13 |
+
"eos": "<|endoftext|>",
|
| 14 |
+
"seed": 0,
|
| 15 |
+
"model": "bigcode/octocoder",
|
| 16 |
+
"modeltype": "causal",
|
| 17 |
+
"peft_model": null,
|
| 18 |
+
"revision": null,
|
| 19 |
+
"use_auth_token": true,
|
| 20 |
+
"trust_remote_code": true,
|
| 21 |
+
"tasks": "humanevalsynthesize-python",
|
| 22 |
+
"instruction_tokens": null,
|
| 23 |
+
"batch_size": 50,
|
| 24 |
+
"max_length_generation": 2048,
|
| 25 |
+
"precision": "bf16",
|
| 26 |
+
"load_in_8bit": false,
|
| 27 |
+
"load_in_4bit": false,
|
| 28 |
+
"limit": null,
|
| 29 |
+
"limit_start": 0,
|
| 30 |
+
"postprocess": true,
|
| 31 |
+
"allow_code_execution": true,
|
| 32 |
+
"generation_only": false,
|
| 33 |
+
"load_generations_path": null,
|
| 34 |
+
"load_data_path": null,
|
| 35 |
+
"metric_output_path": "/fsx/loubna/code/dev/leader/bigcode-evaluation-harness/generations_octocoder/metrics24_humanevalsynthesize-python_octocoder.json",
|
| 36 |
+
"save_generations": true,
|
| 37 |
+
"save_generations_path": "/fsx/loubna/code/dev/leader/bigcode-evaluation-harness/generations_octocoder/generations24_humanevalsynthesize-python_octocoder.json",
|
| 38 |
+
"save_references": false,
|
| 39 |
+
"prompt": "octocoder",
|
| 40 |
+
"max_memory_per_gpu": null,
|
| 41 |
+
"check_references": false
|
| 42 |
+
}
|
| 43 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-cpp_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-cpp": {
|
| 3 |
+
"pass@1": 0.29316770186335417,
|
| 4 |
+
"pass@10": 0.418989804893579
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-d_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-d": {
|
| 3 |
+
"pass@1": 0.1334615384615385,
|
| 4 |
+
"pass@10": 0.22191295496585037
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-java_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-java": {
|
| 3 |
+
"pass@1": 0.26025316455696207,
|
| 4 |
+
"pass@10": 0.3844432224837278
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-jl_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-jl": {
|
| 3 |
+
"pass@1": 0.24503144654088063,
|
| 4 |
+
"pass@10": 0.37015780634005285
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-js_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-js": {
|
| 3 |
+
"pass@1": 0.32795031055900625,
|
| 4 |
+
"pass@10": 0.46965261958375676
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-lua_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-lua": {
|
| 3 |
+
"pass@1": 0.2255900621118012,
|
| 4 |
+
"pass@10": 0.3642531020086279
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-php_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-php": {
|
| 3 |
+
"pass@1": 0.2675776397515528,
|
| 4 |
+
"pass@10": 0.4406294303479089
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-r_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-r": {
|
| 3 |
+
"pass@1": 0.14385093167701862,
|
| 4 |
+
"pass@10": 0.23625313277669435
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-rkt_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-rkt": {
|
| 3 |
+
"pass@1": 0.10608695652173915,
|
| 4 |
+
"pass@10": 0.18583082734989625
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-rs_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-rs": {
|
| 3 |
+
"pass@1": 0.24256410256410257,
|
| 4 |
+
"pass@10": 0.37981269956491903
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octocoder_loubnabnl/metrics_octocoder/metrics_multiple-swift_octocoder.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-swift": {
|
| 3 |
+
"pass@1": 0.18240506329113923,
|
| 4 |
+
"pass@10": 0.27015964860608316
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octocoder",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octogeex_loubnabnl/bigcode_octogeex_loubnabnl.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task": "humaneval", "pass@1": 0.3579268292682927}, {"task": "multiple-swift", "pass@1": 0.17025316455696202}, {"task": "multiple-cpp", "pass@1": 0.2392546583850932}, {"task": "multiple-js", "pass@1": 0.28496894409937884}, {"task": "multiple-lua", "pass@1": 0.1618633540372671}, {"task": "multiple-d", "pass@1": 0.09769230769230768}, {"task": "multiple-rs", "pass@1": 0.17935897435897438}, {"task": "multiple-r", "pass@1": 0.1366459627329193}, {"task": "multiple-php", "pass@1": 0.25850931677018635}, {"task": "multiple-rkt", "pass@1": 0.12024844720496894}, {"task": "multiple-jl", "pass@1": 0.22943396226415103}, {"task": "multiple-java", "pass@1": 0.1932911392405063}], "meta": {"model": "bigcode/octogeex"}}
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_humaneval_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_humanevalsynthesize-python_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-cpp_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-d_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-java_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-jl_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-js_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-lua_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-php_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-r_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-rkt_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-rs_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/generations_octogeex/generations_multiple-swift_octogeex.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_humaneval_octogeex.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"humaneval": {
|
| 3 |
+
"pass@1": 0.3579268292682927,
|
| 4 |
+
"pass@10": 0.5255121713418224
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octogeex",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_humanevalsynthesize-python_octogeex.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"humanevalsynthesize-python": {
|
| 3 |
+
"pass@1": 0.42280487804878053,
|
| 4 |
+
"pass@10": 0.615600414422801
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"prefix": "",
|
| 8 |
+
"do_sample": true,
|
| 9 |
+
"temperature": 0.2,
|
| 10 |
+
"top_k": 0,
|
| 11 |
+
"top_p": 0.95,
|
| 12 |
+
"n_samples": 50,
|
| 13 |
+
"eos": "<|endoftext|>",
|
| 14 |
+
"seed": 0,
|
| 15 |
+
"model": "bigcode/octogeex",
|
| 16 |
+
"modeltype": "causal",
|
| 17 |
+
"peft_model": null,
|
| 18 |
+
"revision": null,
|
| 19 |
+
"use_auth_token": true,
|
| 20 |
+
"trust_remote_code": true,
|
| 21 |
+
"tasks": "humanevalsynthesize-python",
|
| 22 |
+
"instruction_tokens": null,
|
| 23 |
+
"batch_size": 50,
|
| 24 |
+
"max_length_generation": 2048,
|
| 25 |
+
"precision": "bf16",
|
| 26 |
+
"load_in_8bit": false,
|
| 27 |
+
"load_in_4bit": false,
|
| 28 |
+
"limit": null,
|
| 29 |
+
"limit_start": 0,
|
| 30 |
+
"postprocess": true,
|
| 31 |
+
"allow_code_execution": true,
|
| 32 |
+
"generation_only": false,
|
| 33 |
+
"load_generations_path": null,
|
| 34 |
+
"load_data_path": null,
|
| 35 |
+
"metric_output_path": "/fsx/loubna/code/dev/leader/bigcode-evaluation-harness/generations_octogeex/metrics24_humanevalsynthesize-python_octogeex.json",
|
| 36 |
+
"save_generations": true,
|
| 37 |
+
"save_generations_path": "/fsx/loubna/code/dev/leader/bigcode-evaluation-harness/generations_octogeex/generations24_humanevalsynthesize-python_octogeex.json",
|
| 38 |
+
"save_references": false,
|
| 39 |
+
"prompt": "octogeex",
|
| 40 |
+
"max_memory_per_gpu": null,
|
| 41 |
+
"check_references": false
|
| 42 |
+
}
|
| 43 |
+
}
|
community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-cpp_octogeex.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-cpp": {
|
| 3 |
+
"pass@1": 0.2392546583850932,
|
| 4 |
+
"pass@10": 0.3770275481552431
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octogeex",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-d_octogeex.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-d": {
|
| 3 |
+
"pass@1": 0.09769230769230768,
|
| 4 |
+
"pass@10": 0.17328015912854275
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octogeex",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-java_octogeex.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-java": {
|
| 3 |
+
"pass@1": 0.1932911392405063,
|
| 4 |
+
"pass@10": 0.2956204163601109
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octogeex",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-jl_octogeex.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-jl": {
|
| 3 |
+
"pass@1": 0.22943396226415103,
|
| 4 |
+
"pass@10": 0.31614256642955946
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octogeex",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-js_octogeex.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-js": {
|
| 3 |
+
"pass@1": 0.28496894409937884,
|
| 4 |
+
"pass@10": 0.4542697803948776
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octogeex",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-lua_octogeex.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-lua": {
|
| 3 |
+
"pass@1": 0.1618633540372671,
|
| 4 |
+
"pass@10": 0.25142068318547794
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octogeex",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|
community_results/bigcode_octogeex_loubnabnl/metrics_octogeex/metrics_multiple-php_octogeex.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"multiple-php": {
|
| 3 |
+
"pass@1": 0.25850931677018635,
|
| 4 |
+
"pass@10": 0.4130885963164481
|
| 5 |
+
},
|
| 6 |
+
"config": {
|
| 7 |
+
"model": "bigcode/octogeex",
|
| 8 |
+
"temperature": 0.2,
|
| 9 |
+
"n_samples": 50
|
| 10 |
+
}
|
| 11 |
+
}
|