Spaces:
Runtime error
Runtime error
Corey Morris
commited on
Commit
·
e03b231
1
Parent(s):
abac22e
MC1 column had 8 rows with a value of 1. It didn't make sense given the next highest value was 0.47 . Assuming they were data errors, they were removed
Browse files- result_data_processor.py +16 -0
- test_data_processing.py +7 -0
result_data_processor.py
CHANGED
|
@@ -48,6 +48,19 @@ class ResultDataProcessor:
|
|
| 48 |
df.index = (df.index.str.replace('mc\|0', 'mc2', regex=True))
|
| 49 |
df = df.loc[['harness|truthfulqa:mc2']]
|
| 50 |
return df[[model_name]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
@staticmethod
|
|
@@ -119,6 +132,9 @@ class ResultDataProcessor:
|
|
| 119 |
cols = cols[-1:] + cols[:-1]
|
| 120 |
data = data[cols]
|
| 121 |
|
|
|
|
|
|
|
|
|
|
| 122 |
return data
|
| 123 |
|
| 124 |
def rank_data(self):
|
|
|
|
| 48 |
df.index = (df.index.str.replace('mc\|0', 'mc2', regex=True))
|
| 49 |
df = df.loc[['harness|truthfulqa:mc2']]
|
| 50 |
return df[[model_name]]
|
| 51 |
+
|
| 52 |
+
# remove extreme outliers from column harness|truthfulqa:mc1
|
| 53 |
+
def _remove_mc1_outliers(self, df):
|
| 54 |
+
mc1 = df['harness|truthfulqa:mc1']
|
| 55 |
+
# Identify the outliers
|
| 56 |
+
# outliers_condition = mc1 > mc1.quantile(.95)
|
| 57 |
+
outliers_condition = mc1 == 1.0
|
| 58 |
+
# Print out the number of outliers
|
| 59 |
+
print('Number of outliers: ', outliers_condition.sum())
|
| 60 |
+
# Replace the outliers with NaN
|
| 61 |
+
df.loc[outliers_condition, 'harness|truthfulqa:mc1'] = np.nan
|
| 62 |
+
return df
|
| 63 |
+
|
| 64 |
|
| 65 |
|
| 66 |
@staticmethod
|
|
|
|
| 132 |
cols = cols[-1:] + cols[:-1]
|
| 133 |
data = data[cols]
|
| 134 |
|
| 135 |
+
# remove extreme outliers from column harness|truthfulqa:mc1
|
| 136 |
+
data = self._remove_mc1_outliers(data)
|
| 137 |
+
|
| 138 |
return data
|
| 139 |
|
| 140 |
def rank_data(self):
|
test_data_processing.py
CHANGED
|
@@ -34,6 +34,13 @@ class TestResultDataProcessor(unittest.TestCase):
|
|
| 34 |
def test_truthfulqa_mc(self):
|
| 35 |
data = self.processor.data
|
| 36 |
self.assertNotIn('truthfulqa:mc', data.columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
if __name__ == '__main__':
|
| 39 |
unittest.main()
|
|
|
|
| 34 |
def test_truthfulqa_mc(self):
|
| 35 |
data = self.processor.data
|
| 36 |
self.assertNotIn('truthfulqa:mc', data.columns)
|
| 37 |
+
|
| 38 |
+
# check for extreme outliers in mc1 column
|
| 39 |
+
def test_mc1_outliers(self):
|
| 40 |
+
data = self.processor.data
|
| 41 |
+
mc1 = data['harness|truthfulqa:mc1']
|
| 42 |
+
self.assertLess(mc1.max(), 1.0)
|
| 43 |
+
self.assertGreater(mc1.min(), 0.0)
|
| 44 |
|
| 45 |
if __name__ == '__main__':
|
| 46 |
unittest.main()
|