SaylorTwift's picture
SaylorTwift HF Staff
Upload folder using huggingface_hub
2a427ee verified
{
"2025-11-20T15-36-59+01-00_long-horizon-execution-8192_Gm6T2GREQg6indZWYr7qUY.eval": {
"eval_id": "NA7cVCboUagosQMLCMyfpQ",
"run_id": "bpaANsnXqDLDYGsQaQY7As",
"task": "long_horizon_execution:8192",
"task_id": "Gm6T2GREQg6indZWYr7qUY",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:38:46+01:00",
"completed_at": "2025-11-20T15:44:01+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.01,
"params": {}
}
},
"2025-11-20T15-36-58+01-00_long-horizon-execution-4096_W3QMv8KVUf2BLSRZArAxzd.eval": {
"eval_id": "jJPdv4eRojLxynfw2MAedo",
"run_id": "bpaANsnXqDLDYGsQaQY7As",
"task": "long_horizon_execution:4096",
"task_id": "W3QMv8KVUf2BLSRZArAxzd",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:37:40+01:00",
"completed_at": "2025-11-20T15:42:05+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.84,
"params": {}
}
},
"2025-11-20T15-36-58+01-00_long-horizon-execution-65536_GmtEQzBe5eEKggR8HdoNoV.eval": {
"eval_id": "VhLNjTUfqn7yKq944iKk22",
"run_id": "bpaANsnXqDLDYGsQaQY7As",
"task": "long_horizon_execution:65536",
"task_id": "GmtEQzBe5eEKggR8HdoNoV",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:37:50+01:00",
"completed_at": "2025-11-20T15:40:03+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.0,
"params": {}
}
},
"2025-11-20T15-36-58+01-00_long-horizon-execution-2048_Uvr76zF8hkt4PgnsUPDWDS.eval": {
"eval_id": "UjKsVmVMSAm66eNny39LSK",
"run_id": "bpaANsnXqDLDYGsQaQY7As",
"task": "long_horizon_execution:2048",
"task_id": "Uvr76zF8hkt4PgnsUPDWDS",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:36:59+01:00",
"completed_at": "2025-11-20T15:39:10+01:00",
"primary_metric": {
"name": "accuracy",
"value": 1.0,
"params": {}
}
},
"2025-11-20T15-36-58+01-00_long-horizon-execution-32768_FZ9Z2AZfBoMdRpzJ3Rt4Sx.eval": {
"eval_id": "ad6DnHZEH9zkVdEAgJhwCP",
"run_id": "bpaANsnXqDLDYGsQaQY7As",
"task": "long_horizon_execution:32768",
"task_id": "FZ9Z2AZfBoMdRpzJ3Rt4Sx",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:36:59+01:00",
"completed_at": "2025-11-20T15:38:46+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.0,
"params": {}
}
},
"2025-11-20T15-36-58+01-00_long-horizon-execution-16384_mwMLFEBj8eYRiiYPvyD7Xd.eval": {
"eval_id": "QAVERTysNLU62kRvZQoAk5",
"run_id": "bpaANsnXqDLDYGsQaQY7As",
"task": "long_horizon_execution:16384",
"task_id": "mwMLFEBj8eYRiiYPvyD7Xd",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:36:59+01:00",
"completed_at": "2025-11-20T15:37:50+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.0,
"params": {}
}
},
"2025-11-20T15-36-58+01-00_long-horizon-execution-1024_Q8FPP67tZkwjmyx7MrkPdK.eval": {
"eval_id": "VTJ4tkhiXzyqpfyvDkqQbu",
"run_id": "bpaANsnXqDLDYGsQaQY7As",
"task": "long_horizon_execution:1024",
"task_id": "Q8FPP67tZkwjmyx7MrkPdK",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:36:59+01:00",
"completed_at": "2025-11-20T15:37:40+01:00",
"primary_metric": {
"name": "accuracy",
"value": 1.0,
"params": {}
}
},
"2025-11-20T15-33-02+01-00_long-horizon-execution-8192_QZ55FqjvLpVzDPfU4CgkdJ.eval": {
"eval_id": "XHSj88vvSCBdgLtCbsSZgQ",
"run_id": "GFRyNerPFXSvUcW5wjmccn",
"task": "long_horizon_execution:8192",
"task_id": "QZ55FqjvLpVzDPfU4CgkdJ",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:33:05+01:00",
"completed_at": "2025-11-20T15:33:19+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.0,
"params": {}
}
},
"2025-11-20T15-33-02+01-00_long-horizon-execution-4096_6KngaNoJcesYZ3n4wHJtgr.eval": {
"eval_id": "eghpYmgJXjVQzyUrJeqNrD",
"run_id": "GFRyNerPFXSvUcW5wjmccn",
"task": "long_horizon_execution:4096",
"task_id": "6KngaNoJcesYZ3n4wHJtgr",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:33:03+01:00",
"completed_at": "2025-11-20T15:33:19+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.0,
"params": {}
}
},
"2025-11-20T15-33-02+01-00_long-horizon-execution-16384_MSnnENqGY9aXtE549VWfYY.eval": {
"eval_id": "Cbm4Ms6j4DDm82JW6JhjoC",
"run_id": "GFRyNerPFXSvUcW5wjmccn",
"task": "long_horizon_execution:16384",
"task_id": "MSnnENqGY9aXtE549VWfYY",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:33:02+01:00",
"completed_at": "2025-11-20T15:33:10+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.0,
"params": {}
}
},
"2025-11-20T15-33-02+01-00_long-horizon-execution-65536_MKNbbyxFi2SboYmzcZJg7i.eval": {
"eval_id": "2HyufGWpesgASQDYAQWibj",
"run_id": "GFRyNerPFXSvUcW5wjmccn",
"task": "long_horizon_execution:65536",
"task_id": "MKNbbyxFi2SboYmzcZJg7i",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:33:03+01:00",
"completed_at": "2025-11-20T15:33:10+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.0,
"params": {}
}
},
"2025-11-20T15-33-02+01-00_long-horizon-execution-1024_hFHo95vmjgTchZGoSC4pPM.eval": {
"eval_id": "RGPNvdwwBVqBcb4gwPKdpN",
"run_id": "GFRyNerPFXSvUcW5wjmccn",
"task": "long_horizon_execution:1024",
"task_id": "hFHo95vmjgTchZGoSC4pPM",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:33:02+01:00",
"completed_at": "2025-11-20T15:33:05+01:00",
"primary_metric": {
"name": "accuracy",
"value": 1.0,
"params": {}
}
},
"2025-11-20T15-33-02+01-00_long-horizon-execution-32768_Vh2nnbhMtb7hpmk8zMRe7R.eval": {
"eval_id": "7MNDPdZ7F6phteuzwWGHbq",
"run_id": "GFRyNerPFXSvUcW5wjmccn",
"task": "long_horizon_execution:32768",
"task_id": "Vh2nnbhMtb7hpmk8zMRe7R",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:33:02+01:00",
"completed_at": "2025-11-20T15:33:03+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.0,
"params": {}
}
},
"2025-11-20T15-33-02+01-00_long-horizon-execution-2048_g4ytLJepzwxhnKAFsCRkcZ.eval": {
"eval_id": "d5GJrLaBTUyvhTWfuCn6Pb",
"run_id": "GFRyNerPFXSvUcW5wjmccn",
"task": "long_horizon_execution:2048",
"task_id": "g4ytLJepzwxhnKAFsCRkcZ",
"task_version": 0,
"version": 2,
"status": "success",
"model": "hf-inference-providers/openai/gpt-oss-20b",
"started_at": "2025-11-20T15:33:02+01:00",
"completed_at": "2025-11-20T15:33:03+01:00",
"primary_metric": {
"name": "accuracy",
"value": 0.5,
"params": {}
}
}
}