Spaces:

ZainabFatimaa
/

Resume-Analyzer

Running

App Files Files Community

Resume-Analyzer / src /app.py

ZainabFatimaa

Rename src/streamlit_app.py to src/app.py

1f61407 verified 7 months ago

raw

history blame

23.9 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import re
	import io
	import base64
	from collections import Counter
	import matplotlib.pyplot as plt
	import seaborn as sns
	from wordcloud import WordCloud
	import plotly.express as px
	import plotly.graph_objects as go
	from datetime import datetime

	# File processing imports
	import PyPDF2
	import pdfplumber
	import docx
	from docx import Document

	# NLP imports
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize, sent_tokenize
	from nltk.stem import WordNetLemmatizer
	import spacy
	from fuzzywuzzy import fuzz, process
	import language_tool_python

	# ML imports
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity

	# Report generation
	from reportlab.lib.pagesizes import letter
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, BarChart
	from reportlab.lib.styles import getSampleStyleSheet
	from reportlab.lib.units import inch

	# Download NLTK data if not already present
	@st.cache_resource
	def download_nltk_data():
	try:
	nltk.data.find('tokenizers/punkt')
	nltk.data.find('corpora/stopwords')
	nltk.data.find('corpora/wordnet')
	except LookupError:
	nltk.download('punkt')
	nltk.download('stopwords')
	nltk.download('wordnet')

	# Initialize tools
	@st.cache_resource
	def init_tools():
	download_nltk_data()
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	st.warning("spaCy model not found. Install with: python -m spacy download en_core_web_sm")
	nlp = None

	try:
	grammar_tool = language_tool_python.LanguageTool('en-US')
	except:
	st.warning("Grammar tool initialization failed")
	grammar_tool = None

	return nlp, grammar_tool

	class ResumeAnalyzer:
	def __init__(self):
	self.nlp, self.grammar_tool = init_tools()
	self.stop_words = set(stopwords.words('english'))
	self.lemmatizer = WordNetLemmatizer()

	# Job role keywords dictionary
	self.job_keywords = {
	"Data Scientist": ["python", "machine learning", "statistics", "pandas", "numpy", "scikit-learn",
	"tensorflow", "pytorch", "sql", "data analysis", "visualization", "jupyter"],
	"Software Engineer": ["programming", "java", "python", "javascript", "react", "node.js", "database",
	"git", "agile", "testing", "debugging", "api", "frontend", "backend"],
	"Product Manager": ["product", "strategy", "roadmap", "stakeholder", "analytics", "user experience",
	"market research", "agile", "scrum", "requirements", "metrics"],
	"Marketing Manager": ["marketing", "digital marketing", "seo", "social media", "analytics", "campaigns",
	"brand", "content", "advertising", "growth"],
	"Data Analyst": ["sql", "excel", "python", "tableau", "power bi", "statistics", "reporting",
	"data visualization", "business intelligence", "analytics"]
	}

	# Common skills database
	self.technical_skills = [
	"python", "java", "javascript", "c++", "sql", "html", "css", "react", "angular", "vue",
	"machine learning", "deep learning", "tensorflow", "pytorch", "pandas", "numpy",
	"docker", "kubernetes", "aws", "azure", "git", "jenkins", "ci/cd"
	]

	self.soft_skills = [
	"leadership", "communication", "teamwork", "problem solving", "critical thinking",
	"project management", "time management", "adaptability", "creativity", "analytical"
	]

	def extract_text_from_pdf(self, file):
	"""Extract text from PDF file"""
	try:
	# Try pdfplumber first
	with pdfplumber.open(file) as pdf:
	text = ""
	for page in pdf.pages:
	text += page.extract_text() or ""
	return text
	except:
	# Fallback to PyPDF2
	try:
	pdf_reader = PyPDF2.PdfReader(file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text
	except:
	return "Error extracting PDF text"

	def extract_text_from_docx(self, file):
	"""Extract text from DOCX file"""
	try:
	doc = Document(file)
	text = ""
	for paragraph in doc.paragraphs:
	text += paragraph.text + "\n"
	return text
	except:
	return "Error extracting DOCX text"

	def extract_text_from_txt(self, file):
	"""Extract text from TXT file"""
	try:
	return str(file.read(), "utf-8")
	except:
	return "Error extracting TXT text"

	def preprocess_text(self, text):
	"""Clean and preprocess text"""
	# Remove special characters and digits
	text = re.sub(r'[^a-zA-Z\s]', '', text)
	# Convert to lowercase
	text = text.lower()
	# Tokenize
	tokens = word_tokenize(text)
	# Remove stopwords and lemmatize
	tokens = [self.lemmatizer.lemmatize(token) for token in tokens
	if token not in self.stop_words and len(token) > 2]
	return tokens

	def extract_sections(self, text):
	"""Extract different sections from resume"""
	sections = {}

	# Define section patterns
	section_patterns = {
	'education': r'(education\|academic\|qualification\|degree)',
	'experience': r'(experience\|employment\|work\|career\|professional)',
	'skills': r'(skills\|technical\|competencies\|expertise)',
	'projects': r'(projects\|portfolio\|work samples)',
	'certifications': r'(certifications?\|certificates?\|licensed?)',
	'summary': r'(summary\|objective\|profile\|about)'
	}

	text_lower = text.lower()
	lines = text.split('\n')

	for section_name, pattern in section_patterns.items():
	section_content = []
	capturing = False

	for i, line in enumerate(lines):
	if re.search(pattern, line.lower()):
	capturing = True
	continue

	if capturing:
	# Stop if we hit another section
	if any(re.search(p, line.lower()) for p in section_patterns.values() if p != pattern):
	break
	if line.strip():
	section_content.append(line.strip())

	sections[section_name] = '\n'.join(section_content)

	return sections

	def extract_skills(self, text):
	"""Extract technical and soft skills"""
	text_lower = text.lower()

	found_technical = []
	found_soft = []

	for skill in self.technical_skills:
	if skill in text_lower:
	found_technical.append(skill)

	for skill in self.soft_skills:
	if skill in text_lower:
	found_soft.append(skill)

	return found_technical, found_soft

	def keyword_matching(self, text, job_role):
	"""Match keywords for specific job role"""
	if job_role not in self.job_keywords:
	return [], 0

	keywords = self.job_keywords[job_role]
	text_lower = text.lower()

	found_keywords = []
	for keyword in keywords:
	# Use fuzzy matching
	if fuzz.partial_ratio(keyword, text_lower) > 80:
	found_keywords.append(keyword)

	match_percentage = (len(found_keywords) / len(keywords)) * 100
	return found_keywords, match_percentage

	def grammar_check(self, text):
	"""Check grammar and language quality"""
	if not self.grammar_tool:
	return []

	try:
	matches = self.grammar_tool.check(text[:5000]) # Limit text length
	return matches
	except:
	return []

	def calculate_ats_score(self, text, sections):
	"""Calculate ATS friendliness score"""
	score = 0

	# Check for key sections (40 points)
	required_sections = ['experience', 'education', 'skills']
	for section in required_sections:
	if sections.get(section) and len(sections[section]) > 50:
	score += 13.33

	# Check text length (20 points)
	word_count = len(text.split())
	if 300 <= word_count <= 800:
	score += 20
	elif word_count > 200:
	score += 10

	# Check for contact information (20 points)
	email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z\|a-z]{2,}\b'
	phone_pattern = r'(\+\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'

	if re.search(email_pattern, text):
	score += 10
	if re.search(phone_pattern, text):
	score += 10

	# Check for bullet points (20 points)
	bullet_patterns = [r'•', r'◦', r'\*', r'-\s', r'→']
	bullet_count = sum(len(re.findall(pattern, text)) for pattern in bullet_patterns)
	if bullet_count >= 5:
	score += 20
	elif bullet_count >= 2:
	score += 10

	return min(score, 100)

	def generate_persona_summary(self, text, sections):
	"""Generate AI-powered persona summary"""
	# Simple template-based summary (can be enhanced with GPT API)
	education = sections.get('education', '')
	experience = sections.get('experience', '')
	skills = sections.get('skills', '')

	# Extract key information
	degree_match = re.search(r'(bachelor\|master\|phd\|degree\|engineering\|science\|business)',
	education.lower())
	experience_years = len(re.findall(r'\b\d{4}\b', experience))

	# Create summary template
	summary_parts = []

	if degree_match:
	degree = degree_match.group(1).title()
	summary_parts.append(f"A {degree} graduate")
	else:
	summary_parts.append("A dedicated professional")

	if experience_years > 0:
	summary_parts.append(f"with {experience_years}+ years of experience")

	# Add skills context
	tech_skills, soft_skills = self.extract_skills(text)
	if tech_skills:
	main_skills = ', '.join(tech_skills[:3])
	summary_parts.append(f"skilled in {main_skills}")

	if 'project' in text.lower():
	summary_parts.append("with hands-on project experience")

	summary = ' '.join(summary_parts) + "."

	return summary

	def main():
	st.set_page_config(
	page_title="AI Resume Analyzer",
	page_icon="📄",
	layout="wide"
	)

	st.title("🚀 AI-Powered Resume Analyzer")
	st.markdown("Upload your resume and get comprehensive analysis with actionable insights!")

	# Initialize analyzer
	analyzer = ResumeAnalyzer()

	# Sidebar for job role selection
	st.sidebar.header("Analysis Settings")
	job_roles = list(analyzer.job_keywords.keys())
	selected_role = st.sidebar.selectbox("Select Target Job Role:", job_roles)

	# File upload section
	st.header("📁 Upload Your Resume")
	uploaded_file = st.file_uploader(
	"Choose your resume file",
	type=['pdf', 'docx', 'txt'],
	help="Supported formats: PDF, DOCX, TXT"
	)

	if uploaded_file is not None:
	# Extract text based on file type
	file_type = uploaded_file.type

	with st.spinner("Extracting text from resume..."):
	if file_type == "application/pdf":
	text = analyzer.extract_text_from_pdf(uploaded_file)
	elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	text = analyzer.extract_text_from_docx(uploaded_file)
	else: # txt
	text = analyzer.extract_text_from_txt(uploaded_file)

	if "Error" not in text:
	# Process the resume
	st.success("✅ Resume uploaded and processed successfully!")

	# Create tabs for different analyses
	tab1, tab2, tab3, tab4, tab5 = st.tabs([
	"📊 Overview", "🎯 Skills Analysis", "📝 Section Breakdown",
	"🔍 ATS Analysis", "📋 Report & Suggestions"
	])

	with tab1:
	st.header("Resume Overview")

	col1, col2 = st.columns(2)

	with col1:
	# Basic stats
	word_count = len(text.split())
	char_count = len(text)

	st.metric("Word Count", word_count)
	st.metric("Character Count", char_count)

	# Extract sections
	sections = analyzer.extract_sections(text)
	st.metric("Sections Found", len([s for s in sections.values() if s]))

	with col2:
	# Generate persona summary
	persona_summary = analyzer.generate_persona_summary(text, sections)
	st.subheader("🎭 AI Persona Summary")
	st.info(persona_summary)

	# Word cloud
	st.subheader("☁️ Word Cloud")
	preprocessed_tokens = analyzer.preprocess_text(text)
	if preprocessed_tokens:
	wordcloud_text = ' '.join(preprocessed_tokens)
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate(wordcloud_text)

	fig, ax = plt.subplots(figsize=(12, 6))
	ax.imshow(wordcloud, interpolation='bilinear')
	ax.axis('off')
	st.pyplot(fig)

	with tab2:
	st.header("Skills Analysis")

	# Extract skills
	tech_skills, soft_skills = analyzer.extract_skills(text)

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("🔧 Technical Skills")
	if tech_skills:
	for skill in tech_skills:
	st.badge(skill, type="secondary")
	else:
	st.info("No technical skills detected")

	with col2:
	st.subheader("🤝 Soft Skills")
	if soft_skills:
	for skill in soft_skills:
	st.badge(skill, type="primary")
	else:
	st.info("No soft skills detected")

	# Job role matching
	st.subheader(f"🎯 Match Analysis for {selected_role}")
	found_keywords, match_percentage = analyzer.keyword_matching(text, selected_role)

	# Progress bar for match percentage
	st.metric("Match Percentage", f"{match_percentage:.1f}%")
	st.progress(match_percentage / 100)

	if found_keywords:
	st.write("Found Keywords:")
	for keyword in found_keywords:
	st.badge(keyword, type="success")

	# Skills gap analysis
	missing_keywords = [kw for kw in analyzer.job_keywords[selected_role] if kw not in found_keywords]
	if missing_keywords:
	st.write("Missing Keywords (Consider Adding):")
	for keyword in missing_keywords[:10]: # Show top 10
	st.badge(keyword, type="error")

	with tab3:
	st.header("Section Breakdown")

	sections = analyzer.extract_sections(text)

	for section_name, content in sections.items():
	if content:
	with st.expander(f"📋 {section_name.title()} Section"):
	st.text_area(f"{section_name} content", content, height=150, disabled=True)
	else:
	st.warning(f"❌ {section_name.title()} section not found or empty")

	with tab4:
	st.header("ATS Analysis")

	# Calculate ATS score
	ats_score = analyzer.calculate_ats_score(text, sections)

	col1, col2 = st.columns(2)

	with col1:
	# ATS Score gauge
	fig = go.Figure(go.Indicator(
	mode="gauge+number+delta",
	value=ats_score,
	domain={'x': [0, 1], 'y': [0, 1]},
	title={'text': "ATS Friendliness Score"},
	delta={'reference': 80},
	gauge={
	'axis': {'range': [None, 100]},
	'bar': {'color': "darkblue"},
	'steps': [
	{'range': [0, 50], 'color': "lightgray"},
	{'range': [50, 80], 'color': "yellow"},
	{'range': [80, 100], 'color': "green"}
	],
	'threshold': {
	'line': {'color': "red", 'width': 4},
	'thickness': 0.75,
	'value': 90
	}
	}
	))
	fig.update_layout(height=300)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Grammar check
	st.subheader("📝 Grammar Check")
	grammar_errors = analyzer.grammar_check(text)

	if grammar_errors:
	st.warning(f"Found {len(grammar_errors)} potential issues")
	for i, error in enumerate(grammar_errors[:5]): # Show first 5
	st.text(f"{i+1}. {error.message}")
	else:
	st.success("✅ No major grammar issues detected")

	# ATS recommendations
	st.subheader("💡 ATS Improvement Suggestions")
	recommendations = []

	if ats_score < 70:
	recommendations.extend([
	"Add more bullet points to improve readability",
	"Include contact information (email, phone)",
	"Ensure all major sections are present",
	"Use standard section headings"
	])

	if match_percentage < 60:
	recommendations.append(f"Include more {selected_role}-specific keywords")

	if len(text.split()) < 300:
	recommendations.append("Consider adding more detailed descriptions")

	for rec in recommendations:
	st.write(f"• {rec}")

	with tab5:
	st.header("Comprehensive Report & Suggestions")

	# Overall scores
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("ATS Score", f"{ats_score}/100",
	delta=f"{ats_score-70} vs Average" if ats_score >= 70 else f"{ats_score-70} vs Average")

	with col2:
	st.metric("Role Match", f"{match_percentage:.1f}%",
	delta=f"{match_percentage-60:.1f}% vs Good Match" if match_percentage >= 60 else f"{match_percentage-60:.1f}% vs Good Match")

	with col3:
	overall_score = (ats_score + match_percentage) / 2
	st.metric("Overall Score", f"{overall_score:.1f}/100")

	# Detailed feedback
	st.subheader("📋 Detailed Feedback")

	# Strengths
	strengths = []
	if ats_score >= 80:
	strengths.append("Resume is ATS-friendly")
	if match_percentage >= 70:
	strengths.append(f"Strong match for {selected_role} position")
	if len(tech_skills) >= 5:
	strengths.append("Rich technical skill set")
	if len(sections) >= 4:
	strengths.append("Well-structured with multiple sections")

	if strengths:
	st.success("Strengths:")
	for strength in strengths:
	st.write(f"✅ {strength}")

	# Areas for improvement
	improvements = []
	if ats_score < 70:
	improvements.append("Improve ATS compatibility")
	if match_percentage < 60:
	improvements.append("Add more role-specific keywords")
	if not sections.get('projects'):
	improvements.append("Consider adding a projects section")
	if len(soft_skills) < 3:
	improvements.append("Highlight more soft skills")

	if improvements:
	st.warning("Areas for Improvement:")
	for improvement in improvements:
	st.write(f"⚠️ {improvement}")

	# Generate downloadable report
	st.subheader("📄 Download Report")
	if st.button("Generate PDF Report"):
	# Create a simple text report (in real implementation, use ReportLab)
	report_content = f"""
	RESUME ANALYSIS REPORT
	Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

	OVERVIEW:
	- ATS Score: {ats_score}/100
	- Role Match: {match_percentage:.1f}%
	- Overall Score: {overall_score:.1f}/100

	PERSONA SUMMARY:
	{persona_summary}

	TECHNICAL SKILLS FOUND:
	{', '.join(tech_skills) if tech_skills else 'None detected'}

	SOFT SKILLS FOUND:
	{', '.join(soft_skills) if soft_skills else 'None detected'}

	ROLE-SPECIFIC KEYWORDS FOUND:
	{', '.join(found_keywords) if found_keywords else 'None found'}

	STRENGTHS:
	{chr(10).join(f'- {s}' for s in strengths)}

	AREAS FOR IMPROVEMENT:
	{chr(10).join(f'- {i}' for i in improvements)}
	"""

	st.download_button(
	label="📥 Download Report",
	data=report_content,
	file_name=f"resume_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
	mime="text/plain"
	)
	else:
	st.error("❌ Error processing the uploaded file. Please try a different file.")

	if __name__ == "__main__":
	main()