Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update evaluate.py
Browse files- evaluate.py +11 -39
    	
        evaluate.py
    CHANGED
    
    | @@ -1,45 +1,17 @@ | |
| 1 | 
            -
            from  | 
| 2 | 
            -
            import torch
         | 
| 3 | 
            -
            from datasets import load_dataset
         | 
| 4 | 
            -
            from sklearn.metrics.pairwise import cosine_similarity
         | 
| 5 | 
            -
            import numpy as np
         | 
| 6 | 
            -
             | 
| 7 | 
            -
            def evaluate_model(model_name, dataset):
         | 
| 8 | 
            -
                device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 9 | 
            -
             | 
| 10 | 
            -
                try:
         | 
| 11 | 
            -
                    tokenizer = AutoTokenizer.from_pretrained(model_name)
         | 
| 12 | 
            -
                    model = AutoModel.from_pretrained(model_name)
         | 
| 13 | 
            -
                    model.eval()
         | 
| 14 | 
            -
                    model.to(device)
         | 
| 15 | 
            -
                except Exception as e:
         | 
| 16 | 
            -
                    print(f"Model loading failed: {e}")
         | 
| 17 | 
            -
                    return None
         | 
| 18 | 
            -
             | 
| 19 | 
            -
                embeddings1, embeddings2 = [], []
         | 
| 20 |  | 
|  | |
| 21 | 
             
                try:
         | 
| 22 | 
            -
                     | 
| 23 | 
            -
                        inputs1 = tokenizer(item["instruction"], return_tensors="pt", truncation=True, padding=True).to(device)
         | 
| 24 | 
            -
                        inputs2 = tokenizer(item["output"], return_tensors="pt", truncation=True, padding=True).to(device)
         | 
| 25 | 
            -
             | 
| 26 | 
            -
                        with torch.no_grad():
         | 
| 27 | 
            -
                            embed1 = model(**inputs1).last_hidden_state[:, 0, :].cpu().numpy()
         | 
| 28 | 
            -
                            embed2 = model(**inputs2).last_hidden_state[:, 0, :].cpu().numpy()
         | 
| 29 | 
            -
             | 
| 30 | 
            -
                        embeddings1.append(embed1.flatten())
         | 
| 31 | 
            -
                        embeddings2.append(embed2.flatten())
         | 
| 32 | 
            -
             | 
| 33 | 
            -
                    sims = [cosine_similarity([e1], [e2])[0][0] for e1, e2 in zip(embeddings1, embeddings2)]
         | 
| 34 |  | 
| 35 | 
            -
                     | 
| 36 | 
            -
             | 
| 37 | 
            -
                         | 
| 38 | 
            -
                         | 
| 39 | 
            -
             | 
| 40 | 
            -
                         | 
| 41 | 
            -
                        return None
         | 
| 42 |  | 
|  | |
| 43 | 
             
                except Exception as e:
         | 
| 44 | 
             
                    print(f"Evaluation failed: {e}")
         | 
| 45 | 
            -
                    return None
         | 
|  | |
| 1 | 
            +
            from sentence_transformers import SentenceTransformer, util
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2 |  | 
| 3 | 
            +
            def evaluate_model(model_name,dataset):
         | 
| 4 | 
             
                try:
         | 
| 5 | 
            +
                    model = SentenceTransformer(model_name)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 6 |  | 
| 7 | 
            +
                    scores = []
         | 
| 8 | 
            +
                    for row in dataset:
         | 
| 9 | 
            +
                        emb1 = model.encode(row["instruction"], convert_to_tensor=True)
         | 
| 10 | 
            +
                        emb2 = model.encode(row["output"], convert_to_tensor=True)
         | 
| 11 | 
            +
                        sim_score = float(util.cos_sim(emb1, emb2)[0])
         | 
| 12 | 
            +
                        scores.append(sim_score)
         | 
|  | |
| 13 |  | 
| 14 | 
            +
                    return sum(scores) / len(scores)
         | 
| 15 | 
             
                except Exception as e:
         | 
| 16 | 
             
                    print(f"Evaluation failed: {e}")
         | 
| 17 | 
            +
                    return None
         |