alielfilali01 commited on
Commit
983659d
·
verified ·
1 Parent(s): 8639da2

Update assets/results/aragen_v2_results.json

Browse files
assets/results/aragen_v2_results.json CHANGED
@@ -2970,6 +2970,66 @@
2970
  }
2971
  },
2972
  {
2973
- "_last_sync_timestamp": "2025-07-21T14:44:33.422103"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2974
  }
2975
  ]
 
2970
  }
2971
  },
2972
  {
2973
+ "claude-3.5-sonnet Scores": {
2974
+ "3C3H Scores": {
2975
+ "Correctness": 0.3284,
2976
+ "Completeness": 0.3245,
2977
+ "Conciseness": 0.2213,
2978
+ "Helpfulness": 0.3081,
2979
+ "Honesty": 0.323,
2980
+ "Harmlessness": 0.3284,
2981
+ "3C3H Score": 0.3056
2982
+ },
2983
+ "Tasks Scores": {
2984
+ "Question Answering (QA)": 0.0796,
2985
+ "Orthographic and Grammatical Analysis": 0.0,
2986
+ "Safety": 0.875,
2987
+ "Reasoning": 0.7571
2988
+ }
2989
+ },
2990
+ "Meta": {
2991
+ "Model Name": "openai/gpt-oss-20b",
2992
+ "License": "Open",
2993
+ "Revision": "main",
2994
+ "Precision": "bfloat16",
2995
+ "Params": 6.421,
2996
+ "Total Entries": 340,
2997
+ "Successful Entries": 340,
2998
+ "Failed Entries": 0,
2999
+ "Success Ratio": 1.0
3000
+ }
3001
+ },
3002
+ {
3003
+ "claude-3.5-sonnet Scores": {
3004
+ "3C3H Scores": {
3005
+ "Correctness": 0.431,
3006
+ "Completeness": 0.428,
3007
+ "Conciseness": 0.2184,
3008
+ "Helpfulness": 0.4135,
3009
+ "Honesty": 0.4283,
3010
+ "Harmlessness": 0.4295,
3011
+ "3C3H Score": 0.3914
3012
+ },
3013
+ "Tasks Scores": {
3014
+ "Question Answering (QA)": 0.2324,
3015
+ "Orthographic and Grammatical Analysis": 0.0,
3016
+ "Safety": 0.7219,
3017
+ "Reasoning": 0.8202
3018
+ }
3019
+ },
3020
+ "Meta": {
3021
+ "Model Name": "openai/gpt-oss-120b",
3022
+ "License": "Open",
3023
+ "Revision": "main",
3024
+ "Precision": "bfloat16",
3025
+ "Params": 39.665,
3026
+ "Total Entries": 340,
3027
+ "Successful Entries": 338,
3028
+ "Failed Entries": 2,
3029
+ "Success Ratio": 0.9941
3030
+ }
3031
+ },
3032
+ {
3033
+ "_last_sync_timestamp": "2025-0807T11:24:33.422103"
3034
  }
3035
  ]