Update README.md
Browse files
README.md
CHANGED
|
@@ -99,6 +99,35 @@ In LM-Studio, simply select the ChatML Prefix on the settings side pane:
|
|
| 99 |
|
| 100 |
Average: 0.4399
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
## GPT4All
|
| 103 |
|
| 104 |
```
|
|
|
|
| 99 |
|
| 100 |
Average: 0.4399
|
| 101 |
|
| 102 |
+
## BigBench Hard
|
| 103 |
+
|
| 104 |
+
```
|
| 105 |
+
hf-causal-experimental (pretrained=openaccess-ai-collective/dpopenhermes-alpha-v1,dtype=bfloat16,trust_remote_code=True,use_accelerate=True), limit: None, provide_description: False, num_fewshot: 0, batch_size: 16
|
| 106 |
+
| Task |Version| Metric |Value | |Stderr|
|
| 107 |
+
|------------------------------------------------|------:|---------------------|-----:|---|-----:|
|
| 108 |
+
|bigbench_causal_judgement | 0|multiple_choice_grade|0.5632|_ |0.0361|
|
| 109 |
+
|bigbench_date_understanding | 0|multiple_choice_grade|0.6612|_ |0.0247|
|
| 110 |
+
|bigbench_disambiguation_qa | 0|multiple_choice_grade|0.3566|_ |0.0299|
|
| 111 |
+
|bigbench_geometric_shapes | 0|multiple_choice_grade|0.2006|_ |0.0212|
|
| 112 |
+
| | |exact_str_match |0.0334|_ |0.0095|
|
| 113 |
+
|bigbench_logical_deduction_five_objects | 0|multiple_choice_grade|0.3020|_ |0.0206|
|
| 114 |
+
|bigbench_logical_deduction_seven_objects | 0|multiple_choice_grade|0.2086|_ |0.0154|
|
| 115 |
+
|bigbench_logical_deduction_three_objects | 0|multiple_choice_grade|0.5033|_ |0.0289|
|
| 116 |
+
|bigbench_movie_recommendation | 0|multiple_choice_grade|0.4220|_ |0.0221|
|
| 117 |
+
|bigbench_navigate | 0|multiple_choice_grade|0.5000|_ |0.0158|
|
| 118 |
+
|bigbench_reasoning_about_colored_objects | 0|multiple_choice_grade|0.7035|_ |0.0102|
|
| 119 |
+
|bigbench_ruin_names | 0|multiple_choice_grade|0.4107|_ |0.0233|
|
| 120 |
+
|bigbench_salient_translation_error_detection | 0|multiple_choice_grade|0.2154|_ |0.0130|
|
| 121 |
+
|bigbench_snarks | 0|multiple_choice_grade|0.7127|_ |0.0337|
|
| 122 |
+
|bigbench_sports_understanding | 0|multiple_choice_grade|0.6988|_ |0.0146|
|
| 123 |
+
|bigbench_temporal_sequences | 0|multiple_choice_grade|0.4670|_ |0.0158|
|
| 124 |
+
|bigbench_tracking_shuffled_objects_five_objects | 0|multiple_choice_grade|0.2072|_ |0.0115|
|
| 125 |
+
|bigbench_tracking_shuffled_objects_seven_objects| 0|multiple_choice_grade|0.1731|_ |0.0090|
|
| 126 |
+
|bigbench_tracking_shuffled_objects_three_objects| 0|multiple_choice_grade|0.5033|_ |0.0289|
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
Average: 0.4338
|
| 130 |
+
|
| 131 |
## GPT4All
|
| 132 |
|
| 133 |
```
|