ZainabFatimaa's picture
Rename src/streamlit_app.py to src/app.py
1f61407 verified
raw
history blame
23.9 kB
import streamlit as st
import pandas as pd
import numpy as np
import re
import io
import base64
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
# File processing imports
import PyPDF2
import pdfplumber
import docx
from docx import Document
# NLP imports
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import WordNetLemmatizer
import spacy
from fuzzywuzzy import fuzz, process
import language_tool_python
# ML imports
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Report generation
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, BarChart
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import inch
# Download NLTK data if not already present
@st.cache_resource
def download_nltk_data():
try:
nltk.data.find('tokenizers/punkt')
nltk.data.find('corpora/stopwords')
nltk.data.find('corpora/wordnet')
except LookupError:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize tools
@st.cache_resource
def init_tools():
download_nltk_data()
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
st.warning("spaCy model not found. Install with: python -m spacy download en_core_web_sm")
nlp = None
try:
grammar_tool = language_tool_python.LanguageTool('en-US')
except:
st.warning("Grammar tool initialization failed")
grammar_tool = None
return nlp, grammar_tool
class ResumeAnalyzer:
def __init__(self):
self.nlp, self.grammar_tool = init_tools()
self.stop_words = set(stopwords.words('english'))
self.lemmatizer = WordNetLemmatizer()
# Job role keywords dictionary
self.job_keywords = {
"Data Scientist": ["python", "machine learning", "statistics", "pandas", "numpy", "scikit-learn",
"tensorflow", "pytorch", "sql", "data analysis", "visualization", "jupyter"],
"Software Engineer": ["programming", "java", "python", "javascript", "react", "node.js", "database",
"git", "agile", "testing", "debugging", "api", "frontend", "backend"],
"Product Manager": ["product", "strategy", "roadmap", "stakeholder", "analytics", "user experience",
"market research", "agile", "scrum", "requirements", "metrics"],
"Marketing Manager": ["marketing", "digital marketing", "seo", "social media", "analytics", "campaigns",
"brand", "content", "advertising", "growth"],
"Data Analyst": ["sql", "excel", "python", "tableau", "power bi", "statistics", "reporting",
"data visualization", "business intelligence", "analytics"]
}
# Common skills database
self.technical_skills = [
"python", "java", "javascript", "c++", "sql", "html", "css", "react", "angular", "vue",
"machine learning", "deep learning", "tensorflow", "pytorch", "pandas", "numpy",
"docker", "kubernetes", "aws", "azure", "git", "jenkins", "ci/cd"
]
self.soft_skills = [
"leadership", "communication", "teamwork", "problem solving", "critical thinking",
"project management", "time management", "adaptability", "creativity", "analytical"
]
def extract_text_from_pdf(self, file):
"""Extract text from PDF file"""
try:
# Try pdfplumber first
with pdfplumber.open(file) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text() or ""
return text
except:
# Fallback to PyPDF2
try:
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
except:
return "Error extracting PDF text"
def extract_text_from_docx(self, file):
"""Extract text from DOCX file"""
try:
doc = Document(file)
text = ""
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
return text
except:
return "Error extracting DOCX text"
def extract_text_from_txt(self, file):
"""Extract text from TXT file"""
try:
return str(file.read(), "utf-8")
except:
return "Error extracting TXT text"
def preprocess_text(self, text):
"""Clean and preprocess text"""
# Remove special characters and digits
text = re.sub(r'[^a-zA-Z\s]', '', text)
# Convert to lowercase
text = text.lower()
# Tokenize
tokens = word_tokenize(text)
# Remove stopwords and lemmatize
tokens = [self.lemmatizer.lemmatize(token) for token in tokens
if token not in self.stop_words and len(token) > 2]
return tokens
def extract_sections(self, text):
"""Extract different sections from resume"""
sections = {}
# Define section patterns
section_patterns = {
'education': r'(education|academic|qualification|degree)',
'experience': r'(experience|employment|work|career|professional)',
'skills': r'(skills|technical|competencies|expertise)',
'projects': r'(projects|portfolio|work samples)',
'certifications': r'(certifications?|certificates?|licensed?)',
'summary': r'(summary|objective|profile|about)'
}
text_lower = text.lower()
lines = text.split('\n')
for section_name, pattern in section_patterns.items():
section_content = []
capturing = False
for i, line in enumerate(lines):
if re.search(pattern, line.lower()):
capturing = True
continue
if capturing:
# Stop if we hit another section
if any(re.search(p, line.lower()) for p in section_patterns.values() if p != pattern):
break
if line.strip():
section_content.append(line.strip())
sections[section_name] = '\n'.join(section_content)
return sections
def extract_skills(self, text):
"""Extract technical and soft skills"""
text_lower = text.lower()
found_technical = []
found_soft = []
for skill in self.technical_skills:
if skill in text_lower:
found_technical.append(skill)
for skill in self.soft_skills:
if skill in text_lower:
found_soft.append(skill)
return found_technical, found_soft
def keyword_matching(self, text, job_role):
"""Match keywords for specific job role"""
if job_role not in self.job_keywords:
return [], 0
keywords = self.job_keywords[job_role]
text_lower = text.lower()
found_keywords = []
for keyword in keywords:
# Use fuzzy matching
if fuzz.partial_ratio(keyword, text_lower) > 80:
found_keywords.append(keyword)
match_percentage = (len(found_keywords) / len(keywords)) * 100
return found_keywords, match_percentage
def grammar_check(self, text):
"""Check grammar and language quality"""
if not self.grammar_tool:
return []
try:
matches = self.grammar_tool.check(text[:5000]) # Limit text length
return matches
except:
return []
def calculate_ats_score(self, text, sections):
"""Calculate ATS friendliness score"""
score = 0
# Check for key sections (40 points)
required_sections = ['experience', 'education', 'skills']
for section in required_sections:
if sections.get(section) and len(sections[section]) > 50:
score += 13.33
# Check text length (20 points)
word_count = len(text.split())
if 300 <= word_count <= 800:
score += 20
elif word_count > 200:
score += 10
# Check for contact information (20 points)
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
phone_pattern = r'(\+\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
if re.search(email_pattern, text):
score += 10
if re.search(phone_pattern, text):
score += 10
# Check for bullet points (20 points)
bullet_patterns = [r'β€’', r'β—¦', r'\*', r'-\s', r'β†’']
bullet_count = sum(len(re.findall(pattern, text)) for pattern in bullet_patterns)
if bullet_count >= 5:
score += 20
elif bullet_count >= 2:
score += 10
return min(score, 100)
def generate_persona_summary(self, text, sections):
"""Generate AI-powered persona summary"""
# Simple template-based summary (can be enhanced with GPT API)
education = sections.get('education', '')
experience = sections.get('experience', '')
skills = sections.get('skills', '')
# Extract key information
degree_match = re.search(r'(bachelor|master|phd|degree|engineering|science|business)',
education.lower())
experience_years = len(re.findall(r'\b\d{4}\b', experience))
# Create summary template
summary_parts = []
if degree_match:
degree = degree_match.group(1).title()
summary_parts.append(f"A {degree} graduate")
else:
summary_parts.append("A dedicated professional")
if experience_years > 0:
summary_parts.append(f"with {experience_years}+ years of experience")
# Add skills context
tech_skills, soft_skills = self.extract_skills(text)
if tech_skills:
main_skills = ', '.join(tech_skills[:3])
summary_parts.append(f"skilled in {main_skills}")
if 'project' in text.lower():
summary_parts.append("with hands-on project experience")
summary = ' '.join(summary_parts) + "."
return summary
def main():
st.set_page_config(
page_title="AI Resume Analyzer",
page_icon="πŸ“„",
layout="wide"
)
st.title("πŸš€ AI-Powered Resume Analyzer")
st.markdown("Upload your resume and get comprehensive analysis with actionable insights!")
# Initialize analyzer
analyzer = ResumeAnalyzer()
# Sidebar for job role selection
st.sidebar.header("Analysis Settings")
job_roles = list(analyzer.job_keywords.keys())
selected_role = st.sidebar.selectbox("Select Target Job Role:", job_roles)
# File upload section
st.header("πŸ“ Upload Your Resume")
uploaded_file = st.file_uploader(
"Choose your resume file",
type=['pdf', 'docx', 'txt'],
help="Supported formats: PDF, DOCX, TXT"
)
if uploaded_file is not None:
# Extract text based on file type
file_type = uploaded_file.type
with st.spinner("Extracting text from resume..."):
if file_type == "application/pdf":
text = analyzer.extract_text_from_pdf(uploaded_file)
elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
text = analyzer.extract_text_from_docx(uploaded_file)
else: # txt
text = analyzer.extract_text_from_txt(uploaded_file)
if "Error" not in text:
# Process the resume
st.success("βœ… Resume uploaded and processed successfully!")
# Create tabs for different analyses
tab1, tab2, tab3, tab4, tab5 = st.tabs([
"πŸ“Š Overview", "🎯 Skills Analysis", "πŸ“ Section Breakdown",
"πŸ” ATS Analysis", "πŸ“‹ Report & Suggestions"
])
with tab1:
st.header("Resume Overview")
col1, col2 = st.columns(2)
with col1:
# Basic stats
word_count = len(text.split())
char_count = len(text)
st.metric("Word Count", word_count)
st.metric("Character Count", char_count)
# Extract sections
sections = analyzer.extract_sections(text)
st.metric("Sections Found", len([s for s in sections.values() if s]))
with col2:
# Generate persona summary
persona_summary = analyzer.generate_persona_summary(text, sections)
st.subheader("🎭 AI Persona Summary")
st.info(persona_summary)
# Word cloud
st.subheader("☁️ Word Cloud")
preprocessed_tokens = analyzer.preprocess_text(text)
if preprocessed_tokens:
wordcloud_text = ' '.join(preprocessed_tokens)
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(wordcloud_text)
fig, ax = plt.subplots(figsize=(12, 6))
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
st.pyplot(fig)
with tab2:
st.header("Skills Analysis")
# Extract skills
tech_skills, soft_skills = analyzer.extract_skills(text)
col1, col2 = st.columns(2)
with col1:
st.subheader("πŸ”§ Technical Skills")
if tech_skills:
for skill in tech_skills:
st.badge(skill, type="secondary")
else:
st.info("No technical skills detected")
with col2:
st.subheader("🀝 Soft Skills")
if soft_skills:
for skill in soft_skills:
st.badge(skill, type="primary")
else:
st.info("No soft skills detected")
# Job role matching
st.subheader(f"🎯 Match Analysis for {selected_role}")
found_keywords, match_percentage = analyzer.keyword_matching(text, selected_role)
# Progress bar for match percentage
st.metric("Match Percentage", f"{match_percentage:.1f}%")
st.progress(match_percentage / 100)
if found_keywords:
st.write("**Found Keywords:**")
for keyword in found_keywords:
st.badge(keyword, type="success")
# Skills gap analysis
missing_keywords = [kw for kw in analyzer.job_keywords[selected_role] if kw not in found_keywords]
if missing_keywords:
st.write("**Missing Keywords (Consider Adding):**")
for keyword in missing_keywords[:10]: # Show top 10
st.badge(keyword, type="error")
with tab3:
st.header("Section Breakdown")
sections = analyzer.extract_sections(text)
for section_name, content in sections.items():
if content:
with st.expander(f"πŸ“‹ {section_name.title()} Section"):
st.text_area(f"{section_name} content", content, height=150, disabled=True)
else:
st.warning(f"❌ {section_name.title()} section not found or empty")
with tab4:
st.header("ATS Analysis")
# Calculate ATS score
ats_score = analyzer.calculate_ats_score(text, sections)
col1, col2 = st.columns(2)
with col1:
# ATS Score gauge
fig = go.Figure(go.Indicator(
mode="gauge+number+delta",
value=ats_score,
domain={'x': [0, 1], 'y': [0, 1]},
title={'text': "ATS Friendliness Score"},
delta={'reference': 80},
gauge={
'axis': {'range': [None, 100]},
'bar': {'color': "darkblue"},
'steps': [
{'range': [0, 50], 'color': "lightgray"},
{'range': [50, 80], 'color': "yellow"},
{'range': [80, 100], 'color': "green"}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 90
}
}
))
fig.update_layout(height=300)
st.plotly_chart(fig, use_container_width=True)
with col2:
# Grammar check
st.subheader("πŸ“ Grammar Check")
grammar_errors = analyzer.grammar_check(text)
if grammar_errors:
st.warning(f"Found {len(grammar_errors)} potential issues")
for i, error in enumerate(grammar_errors[:5]): # Show first 5
st.text(f"{i+1}. {error.message}")
else:
st.success("βœ… No major grammar issues detected")
# ATS recommendations
st.subheader("πŸ’‘ ATS Improvement Suggestions")
recommendations = []
if ats_score < 70:
recommendations.extend([
"Add more bullet points to improve readability",
"Include contact information (email, phone)",
"Ensure all major sections are present",
"Use standard section headings"
])
if match_percentage < 60:
recommendations.append(f"Include more {selected_role}-specific keywords")
if len(text.split()) < 300:
recommendations.append("Consider adding more detailed descriptions")
for rec in recommendations:
st.write(f"β€’ {rec}")
with tab5:
st.header("Comprehensive Report & Suggestions")
# Overall scores
col1, col2, col3 = st.columns(3)
with col1:
st.metric("ATS Score", f"{ats_score}/100",
delta=f"{ats_score-70} vs Average" if ats_score >= 70 else f"{ats_score-70} vs Average")
with col2:
st.metric("Role Match", f"{match_percentage:.1f}%",
delta=f"{match_percentage-60:.1f}% vs Good Match" if match_percentage >= 60 else f"{match_percentage-60:.1f}% vs Good Match")
with col3:
overall_score = (ats_score + match_percentage) / 2
st.metric("Overall Score", f"{overall_score:.1f}/100")
# Detailed feedback
st.subheader("πŸ“‹ Detailed Feedback")
# Strengths
strengths = []
if ats_score >= 80:
strengths.append("Resume is ATS-friendly")
if match_percentage >= 70:
strengths.append(f"Strong match for {selected_role} position")
if len(tech_skills) >= 5:
strengths.append("Rich technical skill set")
if len(sections) >= 4:
strengths.append("Well-structured with multiple sections")
if strengths:
st.success("**Strengths:**")
for strength in strengths:
st.write(f"βœ… {strength}")
# Areas for improvement
improvements = []
if ats_score < 70:
improvements.append("Improve ATS compatibility")
if match_percentage < 60:
improvements.append("Add more role-specific keywords")
if not sections.get('projects'):
improvements.append("Consider adding a projects section")
if len(soft_skills) < 3:
improvements.append("Highlight more soft skills")
if improvements:
st.warning("**Areas for Improvement:**")
for improvement in improvements:
st.write(f"⚠️ {improvement}")
# Generate downloadable report
st.subheader("πŸ“„ Download Report")
if st.button("Generate PDF Report"):
# Create a simple text report (in real implementation, use ReportLab)
report_content = f"""
RESUME ANALYSIS REPORT
Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
OVERVIEW:
- ATS Score: {ats_score}/100
- Role Match: {match_percentage:.1f}%
- Overall Score: {overall_score:.1f}/100
PERSONA SUMMARY:
{persona_summary}
TECHNICAL SKILLS FOUND:
{', '.join(tech_skills) if tech_skills else 'None detected'}
SOFT SKILLS FOUND:
{', '.join(soft_skills) if soft_skills else 'None detected'}
ROLE-SPECIFIC KEYWORDS FOUND:
{', '.join(found_keywords) if found_keywords else 'None found'}
STRENGTHS:
{chr(10).join(f'- {s}' for s in strengths)}
AREAS FOR IMPROVEMENT:
{chr(10).join(f'- {i}' for i in improvements)}
"""
st.download_button(
label="πŸ“₯ Download Report",
data=report_content,
file_name=f"resume_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
mime="text/plain"
)
else:
st.error("❌ Error processing the uploaded file. Please try a different file.")
if __name__ == "__main__":
main()