Spaces:
Sleeping
Sleeping
| # imports | |
| import json | |
| import time | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification | |
| import torch | |
| import torch.nn.functional as f | |
| from roles_list import roles | |
| # Load the SBERT model and tokenizer | |
| sbert_model_name = 'sentence-transformers/all-MiniLM-L12-v2' | |
| sbert_model = AutoModel.from_pretrained(sbert_model_name) | |
| sbert_tokenizer = AutoTokenizer.from_pretrained(sbert_model_name) | |
| # Load the LLM model and tokenizer | |
| llm_model_name = 'bert-base-uncased' # Using BERT for sequence classification | |
| llm_model = AutoModelForSequenceClassification.from_pretrained(llm_model_name) | |
| llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name) | |
| embed_store = {} | |
| for role in roles: | |
| encoding = sbert_tokenizer(role, # the texts to be tokenized | |
| max_length=10, | |
| padding="max_length", | |
| return_tensors='pt' # return the tensors (not lists) | |
| ) | |
| with torch.no_grad(): | |
| # get the model embeddings | |
| embed = sbert_model(**encoding) | |
| embed = embed.pooler_output | |
| embed_store[role] = f.normalize(embed, p=2, dim=1) | |
| print("SBERT model is ready for inference") | |
| def get_role_from_sbert(title): | |
| start_time = time.time() | |
| encoding = sbert_tokenizer(title, | |
| max_length=10, | |
| padding="max_length", | |
| return_tensors='pt' | |
| ) | |
| # Run the model prediction on the input data | |
| with torch.no_grad(): | |
| # get the model embeddings | |
| embed = sbert_model(**encoding) | |
| embed = embed.pooler_output | |
| store_cos = {} | |
| for role in embed_store: | |
| cos_sim = torch.nn.functional.cosine_similarity(f.normalize(embed, p=2, dim=1), embed_store[role]) | |
| store_cos[role] = round(cos_sim.item(), 3) | |
| # Get the top 3 items with the highest cosine similarity | |
| top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True) | |
| job_scores = [{"Role": job, "SBERT Score": score} for job, score in top_3_keys_values] | |
| end_time = time.time() | |
| execution_time = end_time - start_time | |
| return job_scores, execution_time | |
| def get_role_from_llm(title): | |
| start_time = time.time() | |
| llm_scores = [] | |
| for role in roles: | |
| inputs = llm_tokenizer.encode_plus(title, role, return_tensors='pt', max_length=512, truncation=True) | |
| with torch.no_grad(): | |
| outputs = llm_model(**inputs) | |
| score = torch.softmax(outputs.logits, dim=1)[0][1].item() | |
| llm_scores.append({"Role": role, "LLM Score": round(score, 3)}) | |
| end_time = time.time() | |
| execution_time = end_time - start_time | |
| return llm_scores, execution_time | |
| def classify_role(title): | |
| sbert_scores, sbert_execution_time = get_role_from_sbert(title) | |
| llm_scores, llm_execution_time = get_role_from_llm(title) | |
| # Merge results into a single table | |
| role_dict = {item["Role"]: item for item in sbert_scores} | |
| for item in llm_scores: | |
| if item["Role"] in role_dict: | |
| role_dict[item["Role"]]["LLM Score"] = item["LLM Score"] | |
| else: | |
| role_dict[item["Role"]] = {"Role": item["Role"], "SBERT Score": "", "LLM Score": item["LLM Score"]} | |
| results = [] | |
| for role, scores in role_dict.items(): | |
| results.append([role, scores.get("SBERT Score", ""), scores.get("LLM Score", "")]) | |
| execution_time_info = f"SBERT Execution Time: {sbert_execution_time:.4f} seconds, LLM Execution Time: {llm_execution_time:.4f} seconds" | |
| return results, execution_time_info | |
| # Gradio Blocks interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# HackerRank Role Classifier") | |
| with gr.Column(): | |
| input_text = gr.Textbox(label="Job Title") | |
| classify_button = gr.Button("Classify") | |
| output_table = gr.Dataframe(headers=["Role", "SBERT Score", "LLM Score"], label="Role Scores") | |
| execution_time_text = gr.Textbox(label="Execution Time", interactive=False) | |
| classify_button.click(fn=classify_role, inputs=input_text, outputs=[output_table, execution_time_text]) | |
| gr.close_all() | |
| demo.launch() | |