Spaces:
Sleeping
Sleeping
Rafik Matta
commited on
Commit
·
4fa2adf
1
Parent(s):
9313b78
adding GPT2 as an option
Browse files
app.py
CHANGED
|
@@ -3,20 +3,23 @@ import json
|
|
| 3 |
import time
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
-
from transformers import AutoTokenizer, AutoModel
|
| 7 |
-
# pytorch library
|
| 8 |
import torch
|
| 9 |
import torch.nn.functional as f
|
| 10 |
|
| 11 |
-
|
| 12 |
from roles_list import roles
|
| 13 |
-
# Load the model from the specified directory
|
| 14 |
-
embed_store = {}
|
| 15 |
-
model = 'sentence-transformers/all-MiniLM-L12-v2'
|
| 16 |
-
sbert_model = AutoModel.from_pretrained(model)
|
| 17 |
-
sbert_tokenizer = AutoTokenizer.from_pretrained(model)
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
|
|
|
| 20 |
for role in roles:
|
| 21 |
encoding = sbert_tokenizer(role, # the texts to be tokenized
|
| 22 |
max_length=10,
|
|
@@ -28,16 +31,16 @@ for role in roles:
|
|
| 28 |
embed = sbert_model(**encoding)
|
| 29 |
embed = embed.pooler_output
|
| 30 |
embed_store[role] = f.normalize(embed, p=2, dim=1)
|
| 31 |
-
print("
|
| 32 |
|
| 33 |
|
| 34 |
def get_role_from_sbert(title):
|
| 35 |
start_time = time.time()
|
| 36 |
encoding = sbert_tokenizer(title,
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
# Run the model prediction on the input data
|
| 42 |
with torch.no_grad():
|
| 43 |
# get the model embeddings
|
|
@@ -49,19 +52,62 @@ def get_role_from_sbert(title):
|
|
| 49 |
store_cos[role] = round(cos_sim.item(), 3)
|
| 50 |
# Get the top 3 items with the highest cosine similarity
|
| 51 |
top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
end_time = time.time()
|
| 55 |
execution_time = end_time - start_time
|
| 56 |
-
|
| 57 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
|
|
|
| 65 |
|
| 66 |
gr.close_all()
|
| 67 |
-
demo.launch()
|
|
|
|
| 3 |
import time
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
+
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
|
|
|
|
| 7 |
import torch
|
| 8 |
import torch.nn.functional as f
|
| 9 |
|
|
|
|
| 10 |
from roles_list import roles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# Load the SBERT model and tokenizer
|
| 13 |
+
sbert_model_name = 'sentence-transformers/all-MiniLM-L12-v2'
|
| 14 |
+
sbert_model = AutoModel.from_pretrained(sbert_model_name)
|
| 15 |
+
sbert_tokenizer = AutoTokenizer.from_pretrained(sbert_model_name)
|
| 16 |
+
|
| 17 |
+
# Load the LLM model and tokenizer
|
| 18 |
+
llm_model_name = 'bert-base-uncased' # Using BERT for sequence classification
|
| 19 |
+
llm_model = AutoModelForSequenceClassification.from_pretrained(llm_model_name)
|
| 20 |
+
llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
| 21 |
|
| 22 |
+
embed_store = {}
|
| 23 |
for role in roles:
|
| 24 |
encoding = sbert_tokenizer(role, # the texts to be tokenized
|
| 25 |
max_length=10,
|
|
|
|
| 31 |
embed = sbert_model(**encoding)
|
| 32 |
embed = embed.pooler_output
|
| 33 |
embed_store[role] = f.normalize(embed, p=2, dim=1)
|
| 34 |
+
print("SBERT model is ready for inference")
|
| 35 |
|
| 36 |
|
| 37 |
def get_role_from_sbert(title):
|
| 38 |
start_time = time.time()
|
| 39 |
encoding = sbert_tokenizer(title,
|
| 40 |
+
max_length=10,
|
| 41 |
+
padding="max_length",
|
| 42 |
+
return_tensors='pt'
|
| 43 |
+
)
|
| 44 |
# Run the model prediction on the input data
|
| 45 |
with torch.no_grad():
|
| 46 |
# get the model embeddings
|
|
|
|
| 52 |
store_cos[role] = round(cos_sim.item(), 3)
|
| 53 |
# Get the top 3 items with the highest cosine similarity
|
| 54 |
top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
|
| 55 |
+
job_scores = [{"Role": job, "SBERT Score": score} for job, score in top_3_keys_values]
|
| 56 |
+
|
| 57 |
+
end_time = time.time()
|
| 58 |
+
execution_time = end_time - start_time
|
| 59 |
+
|
| 60 |
+
return job_scores, execution_time
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def get_role_from_llm(title):
|
| 64 |
+
start_time = time.time()
|
| 65 |
+
|
| 66 |
+
llm_scores = []
|
| 67 |
+
for role in roles:
|
| 68 |
+
inputs = llm_tokenizer.encode_plus(title, role, return_tensors='pt', max_length=512, truncation=True)
|
| 69 |
+
with torch.no_grad():
|
| 70 |
+
outputs = llm_model(**inputs)
|
| 71 |
+
score = torch.softmax(outputs.logits, dim=1)[0][1].item()
|
| 72 |
+
llm_scores.append({"Role": role, "LLM Score": round(score, 3)})
|
| 73 |
|
| 74 |
end_time = time.time()
|
| 75 |
execution_time = end_time - start_time
|
| 76 |
+
|
| 77 |
+
return llm_scores, execution_time
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def classify_role(title):
|
| 81 |
+
sbert_scores, sbert_execution_time = get_role_from_sbert(title)
|
| 82 |
+
llm_scores, llm_execution_time = get_role_from_llm(title)
|
| 83 |
+
|
| 84 |
+
# Merge results into a single table
|
| 85 |
+
role_dict = {item["Role"]: item for item in sbert_scores}
|
| 86 |
+
for item in llm_scores:
|
| 87 |
+
if item["Role"] in role_dict:
|
| 88 |
+
role_dict[item["Role"]]["LLM Score"] = item["LLM Score"]
|
| 89 |
+
else:
|
| 90 |
+
role_dict[item["Role"]] = {"Role": item["Role"], "SBERT Score": "", "LLM Score": item["LLM Score"]}
|
| 91 |
+
|
| 92 |
+
results = []
|
| 93 |
+
for role, scores in role_dict.items():
|
| 94 |
+
results.append([role, scores.get("SBERT Score", ""), scores.get("LLM Score", "")])
|
| 95 |
+
|
| 96 |
+
execution_time_info = f"SBERT Execution Time: {sbert_execution_time:.4f} seconds, LLM Execution Time: {llm_execution_time:.4f} seconds"
|
| 97 |
+
|
| 98 |
+
return results, execution_time_info
|
| 99 |
|
| 100 |
|
| 101 |
+
# Gradio Blocks interface
|
| 102 |
+
with gr.Blocks() as demo:
|
| 103 |
+
gr.Markdown("# HackerRank Role Classifier")
|
| 104 |
+
with gr.Column():
|
| 105 |
+
input_text = gr.Textbox(label="Job Title")
|
| 106 |
+
classify_button = gr.Button("Classify")
|
| 107 |
+
output_table = gr.Dataframe(headers=["Role", "SBERT Score", "LLM Score"], label="Role Scores")
|
| 108 |
+
execution_time_text = gr.Textbox(label="Execution Time", interactive=False)
|
| 109 |
|
| 110 |
+
classify_button.click(fn=classify_role, inputs=input_text, outputs=[output_table, execution_time_text])
|
| 111 |
|
| 112 |
gr.close_all()
|
| 113 |
+
demo.launch()
|