Spaces:
Running
Running
Use different headers for closed datasets
#14
by
fzoll
- opened
- app/backend/data_page.py +27 -9
app/backend/data_page.py
CHANGED
|
@@ -44,6 +44,23 @@ def get_closed_dataset():
|
|
| 44 |
return closed_list
|
| 45 |
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
def convert_df_to_csv(df):
|
| 48 |
output = io.StringIO()
|
| 49 |
df.to_csv(output, index=False)
|
|
@@ -71,8 +88,8 @@ def _get_dataset_columns(group_name, column_list, avg_column):
|
|
| 71 |
# For individual dataset pages (not sections), group datasets by open/closed
|
| 72 |
if not is_section(group_name) and dataset_columns:
|
| 73 |
# Separate open and closed datasets
|
| 74 |
-
open_datasets = [d for d in dataset_columns if not d.startswith('_')]
|
| 75 |
-
closed_datasets = [d for d in dataset_columns if d.startswith('_')]
|
| 76 |
|
| 77 |
grouped_columns = []
|
| 78 |
|
|
@@ -145,12 +162,12 @@ def _get_dataset_columns(group_name, column_list, avg_column):
|
|
| 145 |
const fieldName = params.column.colId;
|
| 146 |
|
| 147 |
const link = document.createElement('a');
|
| 148 |
-
link.href = 'https://huggingface.co/datasets/embedding-benchmark/' +
|
| 149 |
link.target = '_blank';
|
| 150 |
link.style.color = 'white';
|
| 151 |
link.style.textDecoration = 'underline';
|
| 152 |
link.style.cursor = 'pointer';
|
| 153 |
-
link.textContent =
|
| 154 |
|
| 155 |
link.addEventListener('click', function(e) {{
|
| 156 |
e.stopPropagation();
|
|
@@ -171,12 +188,12 @@ def _get_dataset_columns(group_name, column_list, avg_column):
|
|
| 171 |
return grouped_columns
|
| 172 |
else:
|
| 173 |
# For section pages, return columns without grouping (original behavior)
|
| 174 |
-
return [{'headerName': column if "Average" not in column else column.replace("Average", "").strip().capitalize(),
|
| 175 |
'field': column,
|
| 176 |
'headerStyle': HEADER_STYLE,
|
| 177 |
'cellStyle': CELL_STYLE,
|
| 178 |
-
"headerTooltip": column if "Average" not in column else column.replace("Average",
|
| 179 |
-
"").strip().capitalize(),
|
| 180 |
'headerComponent': JsCode(f"""
|
| 181 |
class DatasetHeaderRenderer {{
|
| 182 |
init(params) {{
|
|
@@ -188,12 +205,13 @@ def _get_dataset_columns(group_name, column_list, avg_column):
|
|
| 188 |
this.eGui.textContent = columnName;
|
| 189 |
}} else {{
|
| 190 |
const link = document.createElement('a');
|
| 191 |
-
link.href = 'https://huggingface.co/datasets/embedding-benchmark/' +
|
| 192 |
link.target = '_blank';
|
| 193 |
link.style.color = 'white';
|
| 194 |
link.style.textDecoration = 'underline';
|
| 195 |
link.style.cursor = 'pointer';
|
| 196 |
-
|
|
|
|
| 197 |
|
| 198 |
link.addEventListener('click', function(e) {{
|
| 199 |
e.stopPropagation();
|
|
|
|
| 44 |
return closed_list
|
| 45 |
|
| 46 |
|
| 47 |
+
def get_dataset_url_name(field_name):
|
| 48 |
+
"""Convert field name to proper URL format for closed datasets"""
|
| 49 |
+
# Handle field names like "ClosedDataset 2 (German Legal Sentences)"
|
| 50 |
+
if field_name.startswith("ClosedDataset "):
|
| 51 |
+
# Extract the number and format it as ClosedDataset_X
|
| 52 |
+
if "(" in field_name:
|
| 53 |
+
# Extract number from "ClosedDataset 2 (description)" -> "2"
|
| 54 |
+
number_part = field_name.split("ClosedDataset ")[1].split(" ")[0]
|
| 55 |
+
return f"ClosedDataset_{number_part}"
|
| 56 |
+
else:
|
| 57 |
+
# Handle cases where it might already be in the right format or no parentheses
|
| 58 |
+
return field_name.replace(" ", "_")
|
| 59 |
+
|
| 60 |
+
# Return original field_name for open datasets
|
| 61 |
+
return field_name
|
| 62 |
+
|
| 63 |
+
|
| 64 |
def convert_df_to_csv(df):
|
| 65 |
output = io.StringIO()
|
| 66 |
df.to_csv(output, index=False)
|
|
|
|
| 88 |
# For individual dataset pages (not sections), group datasets by open/closed
|
| 89 |
if not is_section(group_name) and dataset_columns:
|
| 90 |
# Separate open and closed datasets
|
| 91 |
+
open_datasets = [d for d in dataset_columns if not d.startswith('_') and not d.startswith('ClosedDataset ')]
|
| 92 |
+
closed_datasets = [d for d in dataset_columns if d.startswith('_') or d.startswith('ClosedDataset ')]
|
| 93 |
|
| 94 |
grouped_columns = []
|
| 95 |
|
|
|
|
| 162 |
const fieldName = params.column.colId;
|
| 163 |
|
| 164 |
const link = document.createElement('a');
|
| 165 |
+
link.href = 'https://huggingface.co/datasets/embedding-benchmark/' + '{get_dataset_url_name(column)}';
|
| 166 |
link.target = '_blank';
|
| 167 |
link.style.color = 'white';
|
| 168 |
link.style.textDecoration = 'underline';
|
| 169 |
link.style.cursor = 'pointer';
|
| 170 |
+
link.textContent = '{column}';
|
| 171 |
|
| 172 |
link.addEventListener('click', function(e) {{
|
| 173 |
e.stopPropagation();
|
|
|
|
| 188 |
return grouped_columns
|
| 189 |
else:
|
| 190 |
# For section pages, return columns without grouping (original behavior)
|
| 191 |
+
return [{'headerName': column if column.startswith('_') and "Average" not in column else (column if "Average" not in column else column.replace("Average", "").strip().capitalize()),
|
| 192 |
'field': column,
|
| 193 |
'headerStyle': HEADER_STYLE,
|
| 194 |
'cellStyle': CELL_STYLE,
|
| 195 |
+
"headerTooltip": column if column.startswith('_') and "Average" not in column else (column if "Average" not in column else column.replace("Average",
|
| 196 |
+
"").strip().capitalize()),
|
| 197 |
'headerComponent': JsCode(f"""
|
| 198 |
class DatasetHeaderRenderer {{
|
| 199 |
init(params) {{
|
|
|
|
| 205 |
this.eGui.textContent = columnName;
|
| 206 |
}} else {{
|
| 207 |
const link = document.createElement('a');
|
| 208 |
+
link.href = 'https://huggingface.co/datasets/embedding-benchmark/' + '{get_dataset_url_name(column)}';
|
| 209 |
link.target = '_blank';
|
| 210 |
link.style.color = 'white';
|
| 211 |
link.style.textDecoration = 'underline';
|
| 212 |
link.style.cursor = 'pointer';
|
| 213 |
+
|
| 214 |
+
link.textContent = '{column}';
|
| 215 |
|
| 216 |
link.addEventListener('click', function(e) {{
|
| 217 |
e.stopPropagation();
|