Update assets/results/aragen_v2_results.json
Browse files
assets/results/aragen_v2_results.json
CHANGED
|
@@ -2970,6 +2970,66 @@
|
|
| 2970 |
}
|
| 2971 |
},
|
| 2972 |
{
|
| 2973 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2974 |
}
|
| 2975 |
]
|
|
|
|
| 2970 |
}
|
| 2971 |
},
|
| 2972 |
{
|
| 2973 |
+
"claude-3.5-sonnet Scores": {
|
| 2974 |
+
"3C3H Scores": {
|
| 2975 |
+
"Correctness": 0.3284,
|
| 2976 |
+
"Completeness": 0.3245,
|
| 2977 |
+
"Conciseness": 0.2213,
|
| 2978 |
+
"Helpfulness": 0.3081,
|
| 2979 |
+
"Honesty": 0.323,
|
| 2980 |
+
"Harmlessness": 0.3284,
|
| 2981 |
+
"3C3H Score": 0.3056
|
| 2982 |
+
},
|
| 2983 |
+
"Tasks Scores": {
|
| 2984 |
+
"Question Answering (QA)": 0.0796,
|
| 2985 |
+
"Orthographic and Grammatical Analysis": 0.0,
|
| 2986 |
+
"Safety": 0.875,
|
| 2987 |
+
"Reasoning": 0.7571
|
| 2988 |
+
}
|
| 2989 |
+
},
|
| 2990 |
+
"Meta": {
|
| 2991 |
+
"Model Name": "openai/gpt-oss-20b",
|
| 2992 |
+
"License": "Open",
|
| 2993 |
+
"Revision": "main",
|
| 2994 |
+
"Precision": "bfloat16",
|
| 2995 |
+
"Params": 6.421,
|
| 2996 |
+
"Total Entries": 340,
|
| 2997 |
+
"Successful Entries": 340,
|
| 2998 |
+
"Failed Entries": 0,
|
| 2999 |
+
"Success Ratio": 1.0
|
| 3000 |
+
}
|
| 3001 |
+
},
|
| 3002 |
+
{
|
| 3003 |
+
"claude-3.5-sonnet Scores": {
|
| 3004 |
+
"3C3H Scores": {
|
| 3005 |
+
"Correctness": 0.431,
|
| 3006 |
+
"Completeness": 0.428,
|
| 3007 |
+
"Conciseness": 0.2184,
|
| 3008 |
+
"Helpfulness": 0.4135,
|
| 3009 |
+
"Honesty": 0.4283,
|
| 3010 |
+
"Harmlessness": 0.4295,
|
| 3011 |
+
"3C3H Score": 0.3914
|
| 3012 |
+
},
|
| 3013 |
+
"Tasks Scores": {
|
| 3014 |
+
"Question Answering (QA)": 0.2324,
|
| 3015 |
+
"Orthographic and Grammatical Analysis": 0.0,
|
| 3016 |
+
"Safety": 0.7219,
|
| 3017 |
+
"Reasoning": 0.8202
|
| 3018 |
+
}
|
| 3019 |
+
},
|
| 3020 |
+
"Meta": {
|
| 3021 |
+
"Model Name": "openai/gpt-oss-120b",
|
| 3022 |
+
"License": "Open",
|
| 3023 |
+
"Revision": "main",
|
| 3024 |
+
"Precision": "bfloat16",
|
| 3025 |
+
"Params": 39.665,
|
| 3026 |
+
"Total Entries": 340,
|
| 3027 |
+
"Successful Entries": 338,
|
| 3028 |
+
"Failed Entries": 2,
|
| 3029 |
+
"Success Ratio": 0.9941
|
| 3030 |
+
}
|
| 3031 |
+
},
|
| 3032 |
+
{
|
| 3033 |
+
"_last_sync_timestamp": "2025-0807T11:24:33.422103"
|
| 3034 |
}
|
| 3035 |
]
|