Spaces:
Running
Running
| """ | |
| handle multi_header options | |
| """ | |
| from st_aggrid import JsCode | |
| from streamlit_theme import st_theme | |
| HEADER_STYLE = {'fontSize': '18px'} | |
| CELL_STYLE = {'fontSize': '18px'} | |
| LINK = ' https://huggingface.co/datasets/embedding-benchmark/' | |
| def get_dataset_url_name(field_name): | |
| """Convert field name to proper URL format for closed datasets""" | |
| # Handle field names like "ClosedDataset 2 (German Legal Sentences)" | |
| if field_name.startswith("ClosedDataset "): | |
| # Extract the number and format it as ClosedDataset_X | |
| if "(" in field_name: | |
| # Extract number from "ClosedDataset 2 (description)" -> "2" | |
| number_part = field_name.split("ClosedDataset ")[1].split(" ")[0] | |
| return f"ClosedDataset_{number_part}" | |
| else: | |
| # Handle cases where it might already be in the right format or no parentheses | |
| return field_name.replace(" ", "_") | |
| # Return original field_name for open datasets | |
| return field_name | |
| def format_closed_dataset_header(column_name): | |
| """ | |
| Format closed dataset header to display in two lines | |
| :param column_name: Original column name like "ClosedDataset 2 (German Legal Sentences)" | |
| :return: Formatted header with line break | |
| """ | |
| if column_name.startswith("ClosedDataset ") and "(" in column_name: | |
| # Split "ClosedDataset N (description)" into "ClosedDataset N" and "(description)" | |
| parts = column_name.split("(", 1) | |
| if len(parts) == 2: | |
| dataset_part = parts[0].strip() # "ClosedDataset N" | |
| description_part = "(" + parts[1] # "(description)" | |
| return f"{dataset_part}\n{description_part}" | |
| # Return original if it doesn't match the pattern, removing underscores | |
| return column_name.replace('_', '') | |
| def mutil_header_options(column_list: list, avg_column: str, is_section: bool): | |
| """ | |
| get mutil_header_options - 优化版本,减少组件实例化 | |
| :param column_list: | |
| :param avg_column: | |
| :param is_section: | |
| :return: | |
| """ | |
| if is_section: | |
| column_def_list = [ | |
| {'headerName': column if "Average" not in column else column.replace("Average", "").strip().capitalize(), | |
| 'field': column, | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| 'headerTooltip': column if "Average" not in column else column.replace("Average", | |
| "").strip().capitalize() | |
| # 'suppressSizeToFit': True | |
| } for column in column_list if | |
| column not in (avg_column, "Closed average", "Open average")] | |
| return column_def_list | |
| mutil_column_list = [column for column in column_list if | |
| column not in (avg_column, "Closed average", "Open average")] | |
| close_group_list = list(filter(lambda x: x.startswith('_') or x.startswith("ClosedDataset "), mutil_column_list)) | |
| open_group_list = list( | |
| filter(lambda x: not x.startswith('_') and not x.startswith("ClosedDataset "), mutil_column_list)) | |
| close_group_def = { | |
| 'headerName': 'CLOSED DATASETS', | |
| 'width': 'fit-content', | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| 'autoHeaderHeight': True, | |
| 'children': [ | |
| {'headerName': format_closed_dataset_header(column), | |
| 'field': column, | |
| "headerComponentParams": { | |
| "innerHeaderComponent": "linkHeaderComponent", | |
| "url": LINK + get_dataset_url_name(column), | |
| "headerName": format_closed_dataset_header(column) | |
| }, | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| 'sortable': True, | |
| # 'width': 150, | |
| "suppressColumnVirtualisation": True, | |
| } for column in close_group_list | |
| ], | |
| } | |
| open_group_def = { | |
| 'headerName': 'OPEN DATASETS', | |
| 'width': 'fit-content', | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| 'autoHeaderHeight': True, | |
| 'children': [ | |
| {'headerName': column, | |
| 'field': column, | |
| "headerComponentParams": { | |
| "innerHeaderComponent": "linkHeaderComponent", | |
| "url": LINK + get_dataset_url_name(column), | |
| "headerName": column | |
| }, | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| 'sortable': True, | |
| # 'width': 150, | |
| "suppressColumnVirtualisation": True, | |
| } for column in open_group_list | |
| ], | |
| } | |
| return [close_group_def, open_group_def] | |
| def get_header_options(column_list: list, avg_column: str, is_section: bool): | |
| grid_options = { | |
| 'columnDefs': [ | |
| { | |
| 'headerName': 'Model Name', | |
| 'field': 'model_name', | |
| 'pinned': 'left', | |
| 'sortable': False, | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| 'autoHeaderHeight': True, | |
| "tooltipValueGetter": JsCode( | |
| """ | |
| function(p) { | |
| console.log(p); | |
| const value = p.data && p.data.tooltip ? p.data.tooltip : ""; | |
| const link = p.data && p.data.reference ? p.data.reference : ""; | |
| return link ? `${value} (${link})` : value; | |
| } | |
| """ | |
| ), | |
| "width": 250, | |
| 'cellRenderer': JsCode("""class CustomHTML { | |
| init(params) { | |
| const link = params.data.reference; | |
| this.eGui = document.createElement('div'); | |
| this.eGui.innerHTML = link ? | |
| `<a href="${link}" class="a-cell" target="_blank">${params.value} </a>` : | |
| params.value; | |
| } | |
| getGui() { | |
| return this.eGui; | |
| } | |
| }"""), | |
| 'suppressSizeToFit': True, | |
| 'headerTooltip': 'Name of the embedding model being evaluated for retrieval tasks', | |
| }, | |
| {'headerName': "Vendor", | |
| 'field': 'vendor', | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| # 'suppressSizeToFit': True, | |
| 'headerTooltip': 'Company or organization that developed and maintains the model', | |
| }, | |
| {'headerName': "Overall Score", | |
| 'field': avg_column, | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| # 'suppressSizeToFit': True, | |
| 'headerTooltip': 'Overall average performance score across all evaluated datasets', | |
| }, | |
| # Add Open average column definition | |
| {'headerName': 'Open Average', | |
| 'field': 'Open average', | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| # 'suppressSizeToFit': True, | |
| 'headerTooltip': 'Average NDCG@10 performance score across publicly available datasets', | |
| }, | |
| {'headerName': 'Closed Average', | |
| 'field': 'Closed average', | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| # 'suppressSizeToFit': True, | |
| 'headerTooltip': 'Average NDCG@10 performance score across proprietary/restricted datasets', | |
| }, | |
| { | |
| 'headerName': 'Embd Dtype', | |
| 'field': 'embd_dtype', | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| # 'suppressSizeToFit': True, | |
| 'headerTooltip': 'Data type format of the embedding vectors (e.g., float32, int8, binary)', | |
| }, | |
| { | |
| 'headerName': 'Embd Dim', | |
| 'field': 'embd_dim', | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| # 'suppressSizeToFit': True, | |
| 'valueFormatter': JsCode( | |
| """ | |
| function(params) { | |
| const num = params.value; | |
| if (typeof num !== "number") return num; | |
| if (Math.abs(num) >= 10000) { | |
| return num.toLocaleString("en-US"); | |
| } | |
| return num; | |
| } | |
| """ | |
| ), | |
| 'headerTooltip': 'Number of dimensions in the embedding vector representation', | |
| }, | |
| { | |
| 'headerName': 'Number of Parameters', | |
| 'field': 'num_params', | |
| 'cellDataType': 'number', | |
| "colId": "num_params", | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| 'valueFormatter': JsCode( | |
| """function(params) { | |
| const num = params.value; | |
| if (num >= 1e9) return (num / 1e9).toFixed(2) + "B"; | |
| if (num >= 1e6) return (num / 1e6).toFixed(2) + "M"; | |
| if (num >= 1e3) return (num / 1e3).toFixed(2) + "K"; | |
| return num; | |
| }""" | |
| ), | |
| "width": 120, | |
| # 'suppressSizeToFit': True, | |
| 'headerTooltip': 'Total number of trainable parameters in the embedding model', | |
| }, | |
| { | |
| 'headerName': 'Context Length', | |
| 'field': 'max_tokens', | |
| 'headerClass': 'custom-header-style', | |
| 'cellClass': 'custom-cell-style', | |
| # 'suppressSizeToFit': True, | |
| 'valueFormatter': JsCode( | |
| """ | |
| function(params) { | |
| const num = params.value; | |
| if (typeof num !== "number") return num; | |
| if (Math.abs(num) >= 10000) { | |
| return num.toLocaleString("en-US"); | |
| } | |
| return num; | |
| } | |
| """ | |
| ), | |
| 'headerTooltip': 'Maximum number of tokens the model can process in a single input', | |
| }, | |
| *mutil_header_options(column_list, avg_column, is_section) | |
| ], | |
| 'defaultColDef': { | |
| 'filter': True, | |
| 'sortable': True, | |
| 'resizable': True, | |
| 'headerClass': "multi-line-header", | |
| 'autoHeaderHeight': True, | |
| 'width': 105 | |
| }, | |
| "autoSizeStrategy": { | |
| "type": 'fitCellContents', | |
| "colIds": [column for column in column_list if column not in (avg_column, "Closed average", "Open average")] | |
| }, | |
| "tooltipShowDelay": 500, | |
| "suppressColumnVirtualisation": True | |
| } | |
| return grid_options | |