Spaces:

hackerrank
/

screen-HR

Sleeping

screen-HR / app.py

Rafik Matta

adding GPT2 as an option

4fa2adf over 1 year ago

4.23 kB

	# imports
	import json
	import time

	import gradio as gr
	from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
	import torch
	import torch.nn.functional as f

	from roles_list import roles

	# Load the SBERT model and tokenizer
	sbert_model_name = 'sentence-transformers/all-MiniLM-L12-v2'
	sbert_model = AutoModel.from_pretrained(sbert_model_name)
	sbert_tokenizer = AutoTokenizer.from_pretrained(sbert_model_name)

	# Load the LLM model and tokenizer
	llm_model_name = 'bert-base-uncased' # Using BERT for sequence classification
	llm_model = AutoModelForSequenceClassification.from_pretrained(llm_model_name)
	llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)

	embed_store = {}
	for role in roles:
	encoding = sbert_tokenizer(role, # the texts to be tokenized
	max_length=10,
	padding="max_length",
	return_tensors='pt' # return the tensors (not lists)
	)
	with torch.no_grad():
	# get the model embeddings
	embed = sbert_model(**encoding)
	embed = embed.pooler_output
	embed_store[role] = f.normalize(embed, p=2, dim=1)
	print("SBERT model is ready for inference")


	def get_role_from_sbert(title):
	start_time = time.time()
	encoding = sbert_tokenizer(title,
	max_length=10,
	padding="max_length",
	return_tensors='pt'
	)
	# Run the model prediction on the input data
	with torch.no_grad():
	# get the model embeddings
	embed = sbert_model(**encoding)
	embed = embed.pooler_output
	store_cos = {}
	for role in embed_store:
	cos_sim = torch.nn.functional.cosine_similarity(f.normalize(embed, p=2, dim=1), embed_store[role])
	store_cos[role] = round(cos_sim.item(), 3)
	# Get the top 3 items with the highest cosine similarity
	top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
	job_scores = [{"Role": job, "SBERT Score": score} for job, score in top_3_keys_values]

	end_time = time.time()
	execution_time = end_time - start_time

	return job_scores, execution_time


	def get_role_from_llm(title):
	start_time = time.time()

	llm_scores = []
	for role in roles:
	inputs = llm_tokenizer.encode_plus(title, role, return_tensors='pt', max_length=512, truncation=True)
	with torch.no_grad():
	outputs = llm_model(**inputs)
	score = torch.softmax(outputs.logits, dim=1)[0][1].item()
	llm_scores.append({"Role": role, "LLM Score": round(score, 3)})

	end_time = time.time()
	execution_time = end_time - start_time

	return llm_scores, execution_time


	def classify_role(title):
	sbert_scores, sbert_execution_time = get_role_from_sbert(title)
	llm_scores, llm_execution_time = get_role_from_llm(title)

	# Merge results into a single table
	role_dict = {item["Role"]: item for item in sbert_scores}
	for item in llm_scores:
	if item["Role"] in role_dict:
	role_dict[item["Role"]]["LLM Score"] = item["LLM Score"]
	else:
	role_dict[item["Role"]] = {"Role": item["Role"], "SBERT Score": "", "LLM Score": item["LLM Score"]}

	results = []
	for role, scores in role_dict.items():
	results.append([role, scores.get("SBERT Score", ""), scores.get("LLM Score", "")])

	execution_time_info = f"SBERT Execution Time: {sbert_execution_time:.4f} seconds, LLM Execution Time: {llm_execution_time:.4f} seconds"

	return results, execution_time_info


	# Gradio Blocks interface
	with gr.Blocks() as demo:
	gr.Markdown("# HackerRank Role Classifier")
	with gr.Column():
	input_text = gr.Textbox(label="Job Title")
	classify_button = gr.Button("Classify")
	output_table = gr.Dataframe(headers=["Role", "SBERT Score", "LLM Score"], label="Role Scores")
	execution_time_text = gr.Textbox(label="Execution Time", interactive=False)

	classify_button.click(fn=classify_role, inputs=input_text, outputs=[output_table, execution_time_text])

	gr.close_all()
	demo.launch()