Update assets/results/aragen_v2_results.json
Browse files
assets/results/aragen_v2_results.json
CHANGED
|
@@ -3030,6 +3030,36 @@
|
|
| 3030 |
}
|
| 3031 |
},
|
| 3032 |
{
|
| 3033 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3034 |
}
|
| 3035 |
]
|
|
|
|
| 3030 |
}
|
| 3031 |
},
|
| 3032 |
{
|
| 3033 |
+
"claude-3.5-sonnet Scores": {
|
| 3034 |
+
"3C3H Scores": {
|
| 3035 |
+
"Correctness": 0.7532,
|
| 3036 |
+
"Completeness": 0.703,
|
| 3037 |
+
"Conciseness": 0.483,
|
| 3038 |
+
"Helpfulness": 0.7089,
|
| 3039 |
+
"Honesty": 0.7483,
|
| 3040 |
+
"Harmlessness": 0.7517,
|
| 3041 |
+
"3C3H Score": 0.6914
|
| 3042 |
+
},
|
| 3043 |
+
"Tasks Scores": {
|
| 3044 |
+
"Question Answering (QA)": 0.5892,
|
| 3045 |
+
"Orthographic and Grammatical Analysis": 0.575,
|
| 3046 |
+
"Safety": 0.8156,
|
| 3047 |
+
"Reasoning": 0.9607
|
| 3048 |
+
}
|
| 3049 |
+
},
|
| 3050 |
+
"Meta": {
|
| 3051 |
+
"Model Name": "gpt-5-2025-08-07",
|
| 3052 |
+
"License": "Proprietary",
|
| 3053 |
+
"Revision": "UNK",
|
| 3054 |
+
"Precision": "UNK",
|
| 3055 |
+
"Params": "UNK",
|
| 3056 |
+
"Total Entries": 340,
|
| 3057 |
+
"Successful Entries": 339,
|
| 3058 |
+
"Failed Entries": 1,
|
| 3059 |
+
"Success Ratio": 0.9971
|
| 3060 |
+
}
|
| 3061 |
+
},
|
| 3062 |
+
{
|
| 3063 |
+
"_last_sync_timestamp": "2025-0812T11:24:33.422103"
|
| 3064 |
}
|
| 3065 |
]
|