Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	init_project
#1
by
						
SmileXing
	
							
						- opened
							
					
- .gitignore +44 -0
- app.py +36 -0
- app/__init__.py +0 -0
- app/backend/__init__.py +1 -0
- app/backend/constant.py +83 -0
- app/backend/data_engine.py +117 -0
- app/ui/__init__.py +0 -0
- app/ui/pages/__init__.py +0 -0
- app/ui/pages/data_page.py +170 -0
- app/ui/pages_sections.toml +38 -0
- requirements.txt +3 -0
- utils/__init__.py +0 -0
- utils/cache_decorator.py +54 -0
- utils/http_utils.py +5 -0
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            *.pyc
         | 
| 2 | 
            +
            model_infos.json
         | 
| 3 | 
            +
            space
         | 
| 4 | 
            +
            .venv
         | 
| 5 | 
            +
            results
         | 
| 6 | 
            +
            mteb
         | 
| 7 | 
            +
            **/.DS_Store
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # FastAPI
         | 
| 10 | 
            +
            **/__pycache__/
         | 
| 11 | 
            +
            .pytest_cache/
         | 
| 12 | 
            +
            .coverage
         | 
| 13 | 
            +
            htmlcov/
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            # Gradio
         | 
| 16 | 
            +
            flagged/
         | 
| 17 | 
            +
            gradio_cached_examples/
         | 
| 18 | 
            +
            tmp/
         | 
| 19 | 
            +
            logs/
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            # IDE
         | 
| 22 | 
            +
            .idea/
         | 
| 23 | 
            +
            .vscode/
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            # 环境相关
         | 
| 26 | 
            +
            .env
         | 
| 27 | 
            +
            .env.*
         | 
| 28 | 
            +
            .memo.md
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            # ui pages
         | 
| 31 | 
            +
            app/ui/pages/conversational.py
         | 
| 32 | 
            +
            app/ui/pages/figures-and-tables.py
         | 
| 33 | 
            +
            app/ui/pages/healthcare.py
         | 
| 34 | 
            +
            app/ui/pages/law.py
         | 
| 35 | 
            +
            app/ui/pages/long-context.py
         | 
| 36 | 
            +
            app/ui/pages/multilingual.py
         | 
| 37 | 
            +
            app/ui/pages/text.py
         | 
| 38 | 
            +
            app/ui/pages/text-to-photo.py
         | 
| 39 | 
            +
            app/ui/pages/text-to-text.py
         | 
| 40 | 
            +
            app/ui/pages/tech.py
         | 
| 41 | 
            +
            app/ui/pages/document-screenshot.py
         | 
| 42 | 
            +
            app/ui/pages/finance.py
         | 
| 43 | 
            +
            app/ui/pages/code.py
         | 
| 44 | 
            +
            app/ui/pages/german.py
         | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,36 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import streamlit as st
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            from st_pages import add_page_title, get_nav_from_toml
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            from app.backend.constant import LEADERBOARD_MAP
         | 
| 8 | 
            +
            from app.backend.data_engine import DataEngine
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # init pages
         | 
| 11 | 
            +
            with open("app/ui/pages/data_page.py", "r", encoding="utf-8") as f:
         | 
| 12 | 
            +
                data_page = f.read()
         | 
| 13 | 
            +
            for leaderboard, group_names in LEADERBOARD_MAP.items():
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                for group_name in group_names:
         | 
| 16 | 
            +
                    path = os.path.join("app/ui/pages", f"{group_name}.py")
         | 
| 17 | 
            +
                    with open(path, "w", encoding="utf-8") as f:
         | 
| 18 | 
            +
                        f.write(data_page.replace("$group_name$", group_name)
         | 
| 19 | 
            +
                                )
         | 
| 20 | 
            +
            # init global data engine
         | 
| 21 | 
            +
            data_engine = DataEngine()
         | 
| 22 | 
            +
            data_engine.jsons_to_df()
         | 
| 23 | 
            +
            st.session_state["data_engine"] = data_engine
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            st.set_page_config(layout="wide")
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            # load page tree
         | 
| 28 | 
            +
            nav = get_nav_from_toml(
         | 
| 29 | 
            +
                "app/ui/pages_sections.toml"
         | 
| 30 | 
            +
            )
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            pg = st.navigation(nav)
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            add_page_title(pg)
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            pg.run()
         | 
    	
        app/__init__.py
    ADDED
    
    | 
            File without changes
         | 
    	
        app/backend/__init__.py
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
             | 
    	
        app/backend/constant.py
    ADDED
    
    | @@ -0,0 +1,83 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from enum import Enum
         | 
| 2 | 
            +
             | 
| 3 | 
            +
             | 
| 4 | 
            +
            class Navigation(Enum):
         | 
| 5 | 
            +
                TEXT_LEADERBOARD = "Text Leaderboard"
         | 
| 6 | 
            +
                MULTIMODAL_LEADERBOARD = "Multimodal Leaderboard"
         | 
| 7 | 
            +
             | 
| 8 | 
            +
             | 
| 9 | 
            +
            class TaskCategory(Enum):
         | 
| 10 | 
            +
                LAW = "Law"
         | 
| 11 | 
            +
                CODE = "Code"
         | 
| 12 | 
            +
                CONVERSATIONAL = "Conversational"
         | 
| 13 | 
            +
                TECH = "Tech"
         | 
| 14 | 
            +
                LONG_CONTEXT = "Long-context"
         | 
| 15 | 
            +
                MULTILINGUAL = "Multilingual"
         | 
| 16 | 
            +
             | 
| 17 | 
            +
             | 
| 18 | 
            +
            class ModelProvider(Enum):
         | 
| 19 | 
            +
                OPENAI = "OpenAI"
         | 
| 20 | 
            +
                VOYAGEAI = "VoyageAI"
         | 
| 21 | 
            +
                COHERE = "Cohere"
         | 
| 22 | 
            +
                OTHERS = "Others"
         | 
| 23 | 
            +
             | 
| 24 | 
            +
             | 
| 25 | 
            +
            class EvaluationMetric(Enum):
         | 
| 26 | 
            +
                NDCG_1 = "NDCG@1"
         | 
| 27 | 
            +
                NDCG_3 = "NDCG@3"
         | 
| 28 | 
            +
                NDCG_5 = "NDCG@5"
         | 
| 29 | 
            +
                NDCG_10 = "NDCG@10"
         | 
| 30 | 
            +
                NDCG_20 = "NDCG@20"
         | 
| 31 | 
            +
                NDCG_50 = "NDCG@50"
         | 
| 32 | 
            +
                NDCG_100 = "NDCG@100"
         | 
| 33 | 
            +
                RECALL_1 = "RECALL@1"
         | 
| 34 | 
            +
                RECALL_3 = "RECALL@3"
         | 
| 35 | 
            +
                RECALL_5 = "RECALL@5"
         | 
| 36 | 
            +
                RECALL_10 = "RECALL@10"
         | 
| 37 | 
            +
                RECALL_20 = "RECALL@20"
         | 
| 38 | 
            +
                RECALL_50 = "RECALL@50"
         | 
| 39 | 
            +
                RECALL_100 = "RECALL@100"
         | 
| 40 | 
            +
                PRECISION_1 = "PRECISION@1"
         | 
| 41 | 
            +
                PRECISION_3 = "PRECISION@3"
         | 
| 42 | 
            +
                PRECISION_5 = "PRECISION@5"
         | 
| 43 | 
            +
                PRECISION_10 = "PRECISION@10"
         | 
| 44 | 
            +
                PRECISION_20 = "PRECISION@20"
         | 
| 45 | 
            +
                PRECISION_50 = "PRECISION@50"
         | 
| 46 | 
            +
                PRECISION_100 = "PRECISION@100"
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            class EmbdDtype(Enum):
         | 
| 50 | 
            +
                ALL = "all"
         | 
| 51 | 
            +
                FLOAT_32 = "float32"
         | 
| 52 | 
            +
                INT_8 = "int8"
         | 
| 53 | 
            +
                BINARY = "binary"
         | 
| 54 | 
            +
             | 
| 55 | 
            +
             | 
| 56 | 
            +
            class EmbdDim(Enum):
         | 
| 57 | 
            +
                OP1 = "<=1k"
         | 
| 58 | 
            +
                OP2 = "1k-2k"
         | 
| 59 | 
            +
                OP3 = "2k-5k"
         | 
| 60 | 
            +
                OP4 = ">=5k"
         | 
| 61 | 
            +
             | 
| 62 | 
            +
             | 
| 63 | 
            +
            class Similarity(Enum):
         | 
| 64 | 
            +
                ALL = "all"
         | 
| 65 | 
            +
                COSINE = "cosine"
         | 
| 66 | 
            +
                DOT = "dot"
         | 
| 67 | 
            +
                EUCLIDEAN = "euclidean"
         | 
| 68 | 
            +
             | 
| 69 | 
            +
             | 
| 70 | 
            +
            LEADERBOARD_MAP = {
         | 
| 71 | 
            +
                "Text": [
         | 
| 72 | 
            +
                    "text",
         | 
| 73 | 
            +
                    "law",
         | 
| 74 | 
            +
                    "multilingual",
         | 
| 75 | 
            +
                    "german",
         | 
| 76 | 
            +
                    "code",
         | 
| 77 | 
            +
                    "tech",
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                ],
         | 
| 80 | 
            +
                "Multimodal": [
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                ]
         | 
| 83 | 
            +
            }
         | 
    	
        app/backend/data_engine.py
    ADDED
    
    | @@ -0,0 +1,117 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            """
         | 
| 2 | 
            +
            Data service provider
         | 
| 3 | 
            +
            """
         | 
| 4 | 
            +
            import json
         | 
| 5 | 
            +
            from typing import List
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            import pandas as pd
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            from utils.cache_decorator import cache_df_with_custom_key, cache_dict_with_custom_key
         | 
| 10 | 
            +
            from utils.http_utils import get
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            COLUMNS = ['model_name',
         | 
| 13 | 
            +
                       'embd_dtype', 'embd_dim', 'num_params', 'max_tokens', 'similarity',
         | 
| 14 | 
            +
                       'query_instruct', 'corpus_instruct',
         | 
| 15 | 
            +
                       'ndcg_at_10',
         | 
| 16 | 
            +
                       ]
         | 
| 17 | 
            +
            COLUMNS_TYPES = ["markdown",
         | 
| 18 | 
            +
                             'str', 'str', 'number', 'number', 'str',
         | 
| 19 | 
            +
                             'str', 'str',
         | 
| 20 | 
            +
                             'number',
         | 
| 21 | 
            +
                             ]
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            GIT_URL = "https://raw.githubusercontent.com/embedding-benchmark/ebr/refs/heads/main/results/"
         | 
| 24 | 
            +
            DATASET_URL = f"{GIT_URL}datasets.json"
         | 
| 25 | 
            +
            MODEL_URL = f"{GIT_URL}models.json"
         | 
| 26 | 
            +
            RESULT_URL = f"{GIT_URL}results.json"
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 29 | 
            +
            class DataEngine:
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                def __init__(self):
         | 
| 32 | 
            +
                    self.df = self.init_dataframe()
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                @property
         | 
| 35 | 
            +
                @cache_dict_with_custom_key("models")
         | 
| 36 | 
            +
                def models(self):
         | 
| 37 | 
            +
                    """
         | 
| 38 | 
            +
                    Get models data
         | 
| 39 | 
            +
                    """
         | 
| 40 | 
            +
                    res = get(MODEL_URL)
         | 
| 41 | 
            +
                    if res.status_code == 200:
         | 
| 42 | 
            +
                        return res.json()
         | 
| 43 | 
            +
                    return {}
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                @property
         | 
| 46 | 
            +
                @cache_dict_with_custom_key("datasets")
         | 
| 47 | 
            +
                def datasets(self):
         | 
| 48 | 
            +
                    """
         | 
| 49 | 
            +
                    Get tasks data
         | 
| 50 | 
            +
                    """
         | 
| 51 | 
            +
                    res = get(DATASET_URL)
         | 
| 52 | 
            +
                    if res.status_code == 200:
         | 
| 53 | 
            +
                        return res.json()
         | 
| 54 | 
            +
                    return {}
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                @property
         | 
| 57 | 
            +
                @cache_dict_with_custom_key("results")
         | 
| 58 | 
            +
                def results(self):
         | 
| 59 | 
            +
                    """
         | 
| 60 | 
            +
                    Get results data
         | 
| 61 | 
            +
                    """
         | 
| 62 | 
            +
                    res = get(RESULT_URL)
         | 
| 63 | 
            +
                    if res.status_code == 200:
         | 
| 64 | 
            +
                        return res.json()
         | 
| 65 | 
            +
                    return {}
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                def init_dataframe(self):
         | 
| 68 | 
            +
                    """
         | 
| 69 | 
            +
                    Initialize DataFrame
         | 
| 70 | 
            +
                    """
         | 
| 71 | 
            +
                    d = {"hello": [123], "world": [456]}
         | 
| 72 | 
            +
                    return pd.DataFrame(d)
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                @cache_df_with_custom_key("json_result")
         | 
| 75 | 
            +
                def jsons_to_df(self):
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                    results_list = self.results
         | 
| 78 | 
            +
                    df_results_list = []
         | 
| 79 | 
            +
                    for result_dict in results_list:
         | 
| 80 | 
            +
                        dataset_name = result_dict["dataset_name"]
         | 
| 81 | 
            +
                        df_result_row = pd.DataFrame(result_dict["results"])
         | 
| 82 | 
            +
                        df_result_row["dataset_name"] = dataset_name
         | 
| 83 | 
            +
                        df_results_list.append(df_result_row)
         | 
| 84 | 
            +
                    df_result = pd.concat(df_results_list)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                    df_datasets_list = []
         | 
| 87 | 
            +
                    for item in self.datasets:
         | 
| 88 | 
            +
                        dataset_names = item["datasets"]
         | 
| 89 | 
            +
                        df_dataset_row = pd.DataFrame(
         | 
| 90 | 
            +
                            {
         | 
| 91 | 
            +
                                "group_name": [item["name"] for _ in range(len(dataset_names))],
         | 
| 92 | 
            +
                                "dataset_name": dataset_names,
         | 
| 93 | 
            +
                                "leaderboard": [item["leaderboard"] for _ in range(len(dataset_names))]
         | 
| 94 | 
            +
                            }
         | 
| 95 | 
            +
                        )
         | 
| 96 | 
            +
                        df_datasets_list.append(df_dataset_row)
         | 
| 97 | 
            +
                    df_dataset = pd.concat(df_datasets_list).drop_duplicates()
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                    models_list = self.models
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                    df_model = pd.DataFrame(models_list)
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                    df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
         | 
| 104 | 
            +
                    df = df.groupby(["model_name", "group_name"], as_index=False)["ndcg_at_10"].mean()
         | 
| 105 | 
            +
                    df = pd.merge(df, df_model, on=["model_name"], how="inner")
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    if df.empty:
         | 
| 108 | 
            +
                        return pd.DataFrame(columns=COLUMNS + ["group_name", "reference"])
         | 
| 109 | 
            +
                    return df[COLUMNS + ["group_name", "reference"]]
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                def filter_df(self, group_name: str):
         | 
| 112 | 
            +
                    """
         | 
| 113 | 
            +
                    filter_by_providers
         | 
| 114 | 
            +
                    """
         | 
| 115 | 
            +
                    df = self.jsons_to_df()
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                    return df[df["group_name"] == group_name][COLUMNS][:]
         | 
    	
        app/ui/__init__.py
    ADDED
    
    | 
            File without changes
         | 
    	
        app/ui/pages/__init__.py
    ADDED
    
    | 
            File without changes
         | 
    	
        app/ui/pages/data_page.py
    ADDED
    
    | @@ -0,0 +1,170 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from st_aggrid import AgGrid, JsCode, ColumnsAutoSizeMode
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import streamlit as st
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            HEADER_STYLE = {'fontSize': '18px'}
         | 
| 6 | 
            +
            CELL_STYLE = {'fontSize': '18px'}
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            # Add theme color and grid styles
         | 
| 9 | 
            +
            st.markdown("""
         | 
| 10 | 
            +
                <style>
         | 
| 11 | 
            +
                    :root {
         | 
| 12 | 
            +
                        --theme-color: rgb(129, 150, 64);
         | 
| 13 | 
            +
                        --theme-color-light: rgba(129, 150, 64, 0.2);
         | 
| 14 | 
            +
                    }
         | 
| 15 | 
            +
                    
         | 
| 16 | 
            +
                    /* AG Grid specific overrides */
         | 
| 17 | 
            +
                    .ag-theme-alpine {
         | 
| 18 | 
            +
                        --ag-selected-row-background-color: var(--theme-color-light) !important;
         | 
| 19 | 
            +
                        --ag-row-hover-color: var(--theme-color-light) !important;
         | 
| 20 | 
            +
                        --ag-selected-tab-color: var(--theme-color) !important;
         | 
| 21 | 
            +
                        --ag-range-selection-border-color: var(--theme-color) !important;
         | 
| 22 | 
            +
                        --ag-range-selection-background-color: var(--theme-color-light) !important;
         | 
| 23 | 
            +
                    }
         | 
| 24 | 
            +
                    
         | 
| 25 | 
            +
                    .ag-row-hover {
         | 
| 26 | 
            +
                        background-color: var(--theme-color-light) !important;
         | 
| 27 | 
            +
                    }
         | 
| 28 | 
            +
                    
         | 
| 29 | 
            +
                    .ag-row-selected {
         | 
| 30 | 
            +
                        background-color: var(--theme-color-light) !important;
         | 
| 31 | 
            +
                    }
         | 
| 32 | 
            +
                    
         | 
| 33 | 
            +
                    .ag-row-focus {
         | 
| 34 | 
            +
                        background-color: var(--theme-color-light) !important;
         | 
| 35 | 
            +
                    }
         | 
| 36 | 
            +
                    
         | 
| 37 | 
            +
                    .ag-cell-focus {
         | 
| 38 | 
            +
                        border-color: var(--theme-color) !important;
         | 
| 39 | 
            +
                    }
         | 
| 40 | 
            +
                    
         | 
| 41 | 
            +
                    /* Keep existing styles */
         | 
| 42 | 
            +
                    .center-text {
         | 
| 43 | 
            +
                        text-align: center;
         | 
| 44 | 
            +
                        color: var(--theme-color);
         | 
| 45 | 
            +
                    }
         | 
| 46 | 
            +
                    .center-image {
         | 
| 47 | 
            +
                        display: block;
         | 
| 48 | 
            +
                        margin-left: auto;
         | 
| 49 | 
            +
                        margin-right: auto;
         | 
| 50 | 
            +
                    }
         | 
| 51 | 
            +
                    h2 {
         | 
| 52 | 
            +
                        color: var(--theme-color) !important;
         | 
| 53 | 
            +
                    }
         | 
| 54 | 
            +
                    .ag-header-cell {
         | 
| 55 | 
            +
                        background-color: var(--theme-color) !important;
         | 
| 56 | 
            +
                        color: white !important;
         | 
| 57 | 
            +
                    }
         | 
| 58 | 
            +
                    a {
         | 
| 59 | 
            +
                        color: var(--theme-color) !important;
         | 
| 60 | 
            +
                    }
         | 
| 61 | 
            +
                    a:hover {
         | 
| 62 | 
            +
                        color: rgba(129, 150, 64, 0.8) !important;
         | 
| 63 | 
            +
                    }
         | 
| 64 | 
            +
                </style>
         | 
| 65 | 
            +
            """, unsafe_allow_html=True)
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            #  logo
         | 
| 68 | 
            +
            # st.markdown('<img src="https://www.voyageai.com/logo.svg" class="center-image" width="200">', unsafe_allow_html=True)
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            # title
         | 
| 71 | 
            +
            st.markdown('<h2 class="center-text">Embedding Benchmark For Retrieval</h2>', unsafe_allow_html=True)
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            group_name = "$group_name$"
         | 
| 74 | 
            +
             | 
| 75 | 
            +
            data_engine = st.session_state["data_engine"]
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            df = data_engine.jsons_to_df()[:]
         | 
| 78 | 
            +
             | 
| 79 | 
            +
            df = df[df["group_name"] == group_name].sort_values(by="ndcg_at_10", ascending=False)
         | 
| 80 | 
            +
             | 
| 81 | 
            +
            # setting column config
         | 
| 82 | 
            +
            grid_options = {
         | 
| 83 | 
            +
                'columnDefs': [
         | 
| 84 | 
            +
                    {
         | 
| 85 | 
            +
                        'headerName': 'Model Name',
         | 
| 86 | 
            +
                        'field': 'model_name',
         | 
| 87 | 
            +
                        'pinned': 'left',
         | 
| 88 | 
            +
                        'sortable': False,
         | 
| 89 | 
            +
                        'headerStyle': HEADER_STYLE,
         | 
| 90 | 
            +
                        'cellStyle': CELL_STYLE,
         | 
| 91 | 
            +
                        'cellRenderer': JsCode("""class CustomHTML {
         | 
| 92 | 
            +
                            init(params) {
         | 
| 93 | 
            +
                                const link = params.data.reference;
         | 
| 94 | 
            +
                                this.eGui = document.createElement('div');
         | 
| 95 | 
            +
                                this.eGui.innerHTML = link ? 
         | 
| 96 | 
            +
                                    `<a href="${link}" target="_blank">${params.value}</a>` : 
         | 
| 97 | 
            +
                                    params.value;
         | 
| 98 | 
            +
                            }
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                            getGui() {
         | 
| 101 | 
            +
                                return this.eGui;
         | 
| 102 | 
            +
                            }
         | 
| 103 | 
            +
                        }"""),
         | 
| 104 | 
            +
                    },
         | 
| 105 | 
            +
                    {
         | 
| 106 | 
            +
                        'headerName': 'NDCG@10',
         | 
| 107 | 
            +
                        'field': 'ndcg_at_10',
         | 
| 108 | 
            +
                        'headerStyle': HEADER_STYLE,
         | 
| 109 | 
            +
                        'cellStyle': CELL_STYLE,
         | 
| 110 | 
            +
                    },
         | 
| 111 | 
            +
                    {
         | 
| 112 | 
            +
                        'headerName': 'Data Type',
         | 
| 113 | 
            +
                        'field': 'embd_dtype',
         | 
| 114 | 
            +
                        'headerStyle': HEADER_STYLE,
         | 
| 115 | 
            +
                        'cellStyle': CELL_STYLE,
         | 
| 116 | 
            +
                    },
         | 
| 117 | 
            +
                    {
         | 
| 118 | 
            +
                        'headerName': 'Embd Dim',
         | 
| 119 | 
            +
                        'field': 'embd_dim',
         | 
| 120 | 
            +
                        'headerStyle': HEADER_STYLE,
         | 
| 121 | 
            +
                        'cellStyle': CELL_STYLE,
         | 
| 122 | 
            +
                    },
         | 
| 123 | 
            +
                    {
         | 
| 124 | 
            +
                        'headerName': 'Model Size (# of Parameters)',
         | 
| 125 | 
            +
                        'field': 'num_params',
         | 
| 126 | 
            +
                        'cellDataType': 'number',
         | 
| 127 | 
            +
                        'headerStyle': HEADER_STYLE,
         | 
| 128 | 
            +
                        'cellStyle': CELL_STYLE,
         | 
| 129 | 
            +
                    },
         | 
| 130 | 
            +
                    {
         | 
| 131 | 
            +
                        'headerName': 'Context Length',
         | 
| 132 | 
            +
                        'field': 'max_tokens',
         | 
| 133 | 
            +
                        'headerStyle': HEADER_STYLE,
         | 
| 134 | 
            +
                        'cellStyle': CELL_STYLE,
         | 
| 135 | 
            +
                    },
         | 
| 136 | 
            +
                    {
         | 
| 137 | 
            +
                        'headerName': 'Query Instruction',
         | 
| 138 | 
            +
                        'field': 'query_instruct',
         | 
| 139 | 
            +
                        'headerStyle': HEADER_STYLE,
         | 
| 140 | 
            +
                        'cellStyle': CELL_STYLE,
         | 
| 141 | 
            +
                        'suppressSizeToFit': True,
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                    },
         | 
| 144 | 
            +
                    {
         | 
| 145 | 
            +
                        'headerName': 'Corpus Instruction',
         | 
| 146 | 
            +
                        'field': 'corpus_instruct',
         | 
| 147 | 
            +
                        'headerStyle': HEADER_STYLE,
         | 
| 148 | 
            +
                        'cellStyle': CELL_STYLE,
         | 
| 149 | 
            +
                        'suppressSizeToFit': True,
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                    },
         | 
| 152 | 
            +
                ],
         | 
| 153 | 
            +
                'defaultColDef': {
         | 
| 154 | 
            +
                    'filter': True,
         | 
| 155 | 
            +
                    'sortable': True,
         | 
| 156 | 
            +
                    'resizable': True
         | 
| 157 | 
            +
                },
         | 
| 158 | 
            +
                'autoSizeStrategy': {
         | 
| 159 | 
            +
                    'type': 'fitCellContents'
         | 
| 160 | 
            +
                }
         | 
| 161 | 
            +
            }
         | 
| 162 | 
            +
             | 
| 163 | 
            +
            ag = AgGrid(
         | 
| 164 | 
            +
                df,
         | 
| 165 | 
            +
                enable_enterprise_modules=False,
         | 
| 166 | 
            +
                gridOptions=grid_options,
         | 
| 167 | 
            +
                allow_unsafe_jscode=True,
         | 
| 168 | 
            +
                columns_auto_size_mode=ColumnsAutoSizeMode.FIT_CONTENTS,
         | 
| 169 | 
            +
                theme="streamlit",
         | 
| 170 | 
            +
            )
         | 
    	
        app/ui/pages_sections.toml
    ADDED
    
    | @@ -0,0 +1,38 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [[pages]]
         | 
| 2 | 
            +
            name = "Text Leaderboard"
         | 
| 3 | 
            +
            icon = "📚"
         | 
| 4 | 
            +
            is_section = true
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            [[pages]]
         | 
| 7 | 
            +
            path = "app/ui/pages/text.py"
         | 
| 8 | 
            +
            name = "Overall"
         | 
| 9 | 
            +
            icon = "🏆"
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            [[pages]]
         | 
| 12 | 
            +
            path = "app/ui/pages/law.py"
         | 
| 13 | 
            +
            name = "Law"
         | 
| 14 | 
            +
            icon = "⚖️"
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            [[pages]]
         | 
| 17 | 
            +
            path = "app/ui/pages/multilingual.py"
         | 
| 18 | 
            +
            name = "Multilingual"
         | 
| 19 | 
            +
            icon = "🌎"
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            [[pages]]
         | 
| 22 | 
            +
            path = "app/ui/pages/german.py"
         | 
| 23 | 
            +
            name = "German"
         | 
| 24 | 
            +
            icon = "🇩🇪"
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            [[pages]]
         | 
| 27 | 
            +
            path = "app/ui/pages/code.py"
         | 
| 28 | 
            +
            name = "Code"
         | 
| 29 | 
            +
            icon = "💻"
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            [[pages]]
         | 
| 32 | 
            +
            path = "app/ui/pages/tech.py"
         | 
| 33 | 
            +
            name = "Tech"
         | 
| 34 | 
            +
            icon = "🛠️"
         | 
| 35 | 
            +
             | 
| 36 | 
            +
             | 
| 37 | 
            +
             | 
| 38 | 
            +
             | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            streamlit==1.41.1
         | 
| 2 | 
            +
            streamlit-aggrid==1.0.5
         | 
| 3 | 
            +
            st-pages==1.0.1
         | 
    	
        utils/__init__.py
    ADDED
    
    | 
            File without changes
         | 
    	
        utils/cache_decorator.py
    ADDED
    
    | @@ -0,0 +1,54 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import time
         | 
| 2 | 
            +
            from functools import wraps
         | 
| 3 | 
            +
            import pandas as pd
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            CACHE = {}
         | 
| 6 | 
            +
            TTL = 3600
         | 
| 7 | 
            +
             | 
| 8 | 
            +
             | 
| 9 | 
            +
            def cache_df_with_custom_key(cache_key: str):
         | 
| 10 | 
            +
                def decorator(func):
         | 
| 11 | 
            +
                    @wraps(func)
         | 
| 12 | 
            +
                    def wrapper(*args, **kwargs):
         | 
| 13 | 
            +
                        if cache_key in CACHE and CACHE[cache_key].get("expiry") - time.time() < TTL:
         | 
| 14 | 
            +
                            return CACHE[cache_key]["data"]
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                        result: pd.DataFrame = func(*args, **kwargs)
         | 
| 17 | 
            +
                        if result is not None and not result.empty:
         | 
| 18 | 
            +
                            d = {"expiry": time.time(), "data": result}
         | 
| 19 | 
            +
                            CACHE[cache_key] = d
         | 
| 20 | 
            +
                            return result
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                        CACHE[cache_key]["expiry"] += TTL
         | 
| 23 | 
            +
                        return CACHE[cache_key]["data"]
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    return wrapper
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                return decorator
         | 
| 28 | 
            +
             | 
| 29 | 
            +
             | 
| 30 | 
            +
            def cache_dict_with_custom_key(cache_key: str):
         | 
| 31 | 
            +
                def decorator(func):
         | 
| 32 | 
            +
                    @wraps(func)
         | 
| 33 | 
            +
                    def wrapper(*args, **kwargs):
         | 
| 34 | 
            +
                        if cache_key in CACHE and CACHE[cache_key].get("expiry") - time.time() < TTL:
         | 
| 35 | 
            +
                            return CACHE[cache_key]["data"]
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                        result: dict = func(*args, **kwargs)
         | 
| 38 | 
            +
                        if result:
         | 
| 39 | 
            +
                            d = {"expiry": time.time(), "data": result}
         | 
| 40 | 
            +
                            CACHE[cache_key] = d
         | 
| 41 | 
            +
                            return result
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                        CACHE[cache_key]["expiry"] += TTL
         | 
| 44 | 
            +
                        return CACHE[cache_key]["data"]
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                    return wrapper
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                return decorator
         | 
| 49 | 
            +
             | 
| 50 | 
            +
             | 
| 51 | 
            +
            if __name__ == '__main__':
         | 
| 52 | 
            +
                a = time.time()
         | 
| 53 | 
            +
                time.sleep(5)
         | 
| 54 | 
            +
                print(time.time() - a)
         | 
    	
        utils/http_utils.py
    ADDED
    
    | @@ -0,0 +1,5 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import requests
         | 
| 2 | 
            +
             | 
| 3 | 
            +
             | 
| 4 | 
            +
            def get(url: str, params: str = None, verify: bool = False):
         | 
| 5 | 
            +
                return requests.get(url, params, verify=verify)
         |