Spaces:
Running
Running
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| import io | |
| import base64 | |
| from collections import Counter | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from wordcloud import WordCloud | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from datetime import datetime | |
| # File processing imports | |
| import PyPDF2 | |
| import pdfplumber | |
| import docx | |
| from docx import Document | |
| # NLP imports | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize, sent_tokenize | |
| from nltk.stem import WordNetLemmatizer | |
| import spacy | |
| from fuzzywuzzy import fuzz, process | |
| import language_tool_python | |
| # ML imports | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # Report generation | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, BarChart | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| from reportlab.lib.units import inch | |
| # Download NLTK data if not already present | |
| def download_nltk_data(): | |
| try: | |
| nltk.data.find('tokenizers/punkt') | |
| nltk.data.find('corpora/stopwords') | |
| nltk.data.find('corpora/wordnet') | |
| except LookupError: | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| # Initialize tools | |
| def init_tools(): | |
| download_nltk_data() | |
| try: | |
| nlp = spacy.load("en_core_web_sm") | |
| except OSError: | |
| st.warning("spaCy model not found. Install with: python -m spacy download en_core_web_sm") | |
| nlp = None | |
| try: | |
| grammar_tool = language_tool_python.LanguageTool('en-US') | |
| except: | |
| st.warning("Grammar tool initialization failed") | |
| grammar_tool = None | |
| return nlp, grammar_tool | |
| class ResumeAnalyzer: | |
| def __init__(self): | |
| self.nlp, self.grammar_tool = init_tools() | |
| self.stop_words = set(stopwords.words('english')) | |
| self.lemmatizer = WordNetLemmatizer() | |
| # Job role keywords dictionary | |
| self.job_keywords = { | |
| "Data Scientist": ["python", "machine learning", "statistics", "pandas", "numpy", "scikit-learn", | |
| "tensorflow", "pytorch", "sql", "data analysis", "visualization", "jupyter"], | |
| "Software Engineer": ["programming", "java", "python", "javascript", "react", "node.js", "database", | |
| "git", "agile", "testing", "debugging", "api", "frontend", "backend"], | |
| "Product Manager": ["product", "strategy", "roadmap", "stakeholder", "analytics", "user experience", | |
| "market research", "agile", "scrum", "requirements", "metrics"], | |
| "Marketing Manager": ["marketing", "digital marketing", "seo", "social media", "analytics", "campaigns", | |
| "brand", "content", "advertising", "growth"], | |
| "Data Analyst": ["sql", "excel", "python", "tableau", "power bi", "statistics", "reporting", | |
| "data visualization", "business intelligence", "analytics"] | |
| } | |
| # Common skills database | |
| self.technical_skills = [ | |
| "python", "java", "javascript", "c++", "sql", "html", "css", "react", "angular", "vue", | |
| "machine learning", "deep learning", "tensorflow", "pytorch", "pandas", "numpy", | |
| "docker", "kubernetes", "aws", "azure", "git", "jenkins", "ci/cd" | |
| ] | |
| self.soft_skills = [ | |
| "leadership", "communication", "teamwork", "problem solving", "critical thinking", | |
| "project management", "time management", "adaptability", "creativity", "analytical" | |
| ] | |
| def extract_text_from_pdf(self, file): | |
| """Extract text from PDF file""" | |
| try: | |
| # Try pdfplumber first | |
| with pdfplumber.open(file) as pdf: | |
| text = "" | |
| for page in pdf.pages: | |
| text += page.extract_text() or "" | |
| return text | |
| except: | |
| # Fallback to PyPDF2 | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| except: | |
| return "Error extracting PDF text" | |
| def extract_text_from_docx(self, file): | |
| """Extract text from DOCX file""" | |
| try: | |
| doc = Document(file) | |
| text = "" | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text + "\n" | |
| return text | |
| except: | |
| return "Error extracting DOCX text" | |
| def extract_text_from_txt(self, file): | |
| """Extract text from TXT file""" | |
| try: | |
| return str(file.read(), "utf-8") | |
| except: | |
| return "Error extracting TXT text" | |
| def preprocess_text(self, text): | |
| """Clean and preprocess text""" | |
| # Remove special characters and digits | |
| text = re.sub(r'[^a-zA-Z\s]', '', text) | |
| # Convert to lowercase | |
| text = text.lower() | |
| # Tokenize | |
| tokens = word_tokenize(text) | |
| # Remove stopwords and lemmatize | |
| tokens = [self.lemmatizer.lemmatize(token) for token in tokens | |
| if token not in self.stop_words and len(token) > 2] | |
| return tokens | |
| def extract_sections(self, text): | |
| """Extract different sections from resume""" | |
| sections = {} | |
| # Define section patterns | |
| section_patterns = { | |
| 'education': r'(education|academic|qualification|degree)', | |
| 'experience': r'(experience|employment|work|career|professional)', | |
| 'skills': r'(skills|technical|competencies|expertise)', | |
| 'projects': r'(projects|portfolio|work samples)', | |
| 'certifications': r'(certifications?|certificates?|licensed?)', | |
| 'summary': r'(summary|objective|profile|about)' | |
| } | |
| text_lower = text.lower() | |
| lines = text.split('\n') | |
| for section_name, pattern in section_patterns.items(): | |
| section_content = [] | |
| capturing = False | |
| for i, line in enumerate(lines): | |
| if re.search(pattern, line.lower()): | |
| capturing = True | |
| continue | |
| if capturing: | |
| # Stop if we hit another section | |
| if any(re.search(p, line.lower()) for p in section_patterns.values() if p != pattern): | |
| break | |
| if line.strip(): | |
| section_content.append(line.strip()) | |
| sections[section_name] = '\n'.join(section_content) | |
| return sections | |
| def extract_skills(self, text): | |
| """Extract technical and soft skills""" | |
| text_lower = text.lower() | |
| found_technical = [] | |
| found_soft = [] | |
| for skill in self.technical_skills: | |
| if skill in text_lower: | |
| found_technical.append(skill) | |
| for skill in self.soft_skills: | |
| if skill in text_lower: | |
| found_soft.append(skill) | |
| return found_technical, found_soft | |
| def keyword_matching(self, text, job_role): | |
| """Match keywords for specific job role""" | |
| if job_role not in self.job_keywords: | |
| return [], 0 | |
| keywords = self.job_keywords[job_role] | |
| text_lower = text.lower() | |
| found_keywords = [] | |
| for keyword in keywords: | |
| # Use fuzzy matching | |
| if fuzz.partial_ratio(keyword, text_lower) > 80: | |
| found_keywords.append(keyword) | |
| match_percentage = (len(found_keywords) / len(keywords)) * 100 | |
| return found_keywords, match_percentage | |
| def grammar_check(self, text): | |
| """Check grammar and language quality""" | |
| if not self.grammar_tool: | |
| return [] | |
| try: | |
| matches = self.grammar_tool.check(text[:5000]) # Limit text length | |
| return matches | |
| except: | |
| return [] | |
| def calculate_ats_score(self, text, sections): | |
| """Calculate ATS friendliness score""" | |
| score = 0 | |
| # Check for key sections (40 points) | |
| required_sections = ['experience', 'education', 'skills'] | |
| for section in required_sections: | |
| if sections.get(section) and len(sections[section]) > 50: | |
| score += 13.33 | |
| # Check text length (20 points) | |
| word_count = len(text.split()) | |
| if 300 <= word_count <= 800: | |
| score += 20 | |
| elif word_count > 200: | |
| score += 10 | |
| # Check for contact information (20 points) | |
| email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' | |
| phone_pattern = r'(\+\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}' | |
| if re.search(email_pattern, text): | |
| score += 10 | |
| if re.search(phone_pattern, text): | |
| score += 10 | |
| # Check for bullet points (20 points) | |
| bullet_patterns = [r'β’', r'β¦', r'\*', r'-\s', r'β'] | |
| bullet_count = sum(len(re.findall(pattern, text)) for pattern in bullet_patterns) | |
| if bullet_count >= 5: | |
| score += 20 | |
| elif bullet_count >= 2: | |
| score += 10 | |
| return min(score, 100) | |
| def generate_persona_summary(self, text, sections): | |
| """Generate AI-powered persona summary""" | |
| # Simple template-based summary (can be enhanced with GPT API) | |
| education = sections.get('education', '') | |
| experience = sections.get('experience', '') | |
| skills = sections.get('skills', '') | |
| # Extract key information | |
| degree_match = re.search(r'(bachelor|master|phd|degree|engineering|science|business)', | |
| education.lower()) | |
| experience_years = len(re.findall(r'\b\d{4}\b', experience)) | |
| # Create summary template | |
| summary_parts = [] | |
| if degree_match: | |
| degree = degree_match.group(1).title() | |
| summary_parts.append(f"A {degree} graduate") | |
| else: | |
| summary_parts.append("A dedicated professional") | |
| if experience_years > 0: | |
| summary_parts.append(f"with {experience_years}+ years of experience") | |
| # Add skills context | |
| tech_skills, soft_skills = self.extract_skills(text) | |
| if tech_skills: | |
| main_skills = ', '.join(tech_skills[:3]) | |
| summary_parts.append(f"skilled in {main_skills}") | |
| if 'project' in text.lower(): | |
| summary_parts.append("with hands-on project experience") | |
| summary = ' '.join(summary_parts) + "." | |
| return summary | |
| def main(): | |
| st.set_page_config( | |
| page_title="AI Resume Analyzer", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| st.title("π AI-Powered Resume Analyzer") | |
| st.markdown("Upload your resume and get comprehensive analysis with actionable insights!") | |
| # Initialize analyzer | |
| analyzer = ResumeAnalyzer() | |
| # Sidebar for job role selection | |
| st.sidebar.header("Analysis Settings") | |
| job_roles = list(analyzer.job_keywords.keys()) | |
| selected_role = st.sidebar.selectbox("Select Target Job Role:", job_roles) | |
| # File upload section | |
| st.header("π Upload Your Resume") | |
| uploaded_file = st.file_uploader( | |
| "Choose your resume file", | |
| type=['pdf', 'docx', 'txt'], | |
| help="Supported formats: PDF, DOCX, TXT" | |
| ) | |
| if uploaded_file is not None: | |
| # Extract text based on file type | |
| file_type = uploaded_file.type | |
| with st.spinner("Extracting text from resume..."): | |
| if file_type == "application/pdf": | |
| text = analyzer.extract_text_from_pdf(uploaded_file) | |
| elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| text = analyzer.extract_text_from_docx(uploaded_file) | |
| else: # txt | |
| text = analyzer.extract_text_from_txt(uploaded_file) | |
| if "Error" not in text: | |
| # Process the resume | |
| st.success("β Resume uploaded and processed successfully!") | |
| # Create tabs for different analyses | |
| tab1, tab2, tab3, tab4, tab5 = st.tabs([ | |
| "π Overview", "π― Skills Analysis", "π Section Breakdown", | |
| "π ATS Analysis", "π Report & Suggestions" | |
| ]) | |
| with tab1: | |
| st.header("Resume Overview") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Basic stats | |
| word_count = len(text.split()) | |
| char_count = len(text) | |
| st.metric("Word Count", word_count) | |
| st.metric("Character Count", char_count) | |
| # Extract sections | |
| sections = analyzer.extract_sections(text) | |
| st.metric("Sections Found", len([s for s in sections.values() if s])) | |
| with col2: | |
| # Generate persona summary | |
| persona_summary = analyzer.generate_persona_summary(text, sections) | |
| st.subheader("π AI Persona Summary") | |
| st.info(persona_summary) | |
| # Word cloud | |
| st.subheader("βοΈ Word Cloud") | |
| preprocessed_tokens = analyzer.preprocess_text(text) | |
| if preprocessed_tokens: | |
| wordcloud_text = ' '.join(preprocessed_tokens) | |
| wordcloud = WordCloud(width=800, height=400, background_color='white').generate(wordcloud_text) | |
| fig, ax = plt.subplots(figsize=(12, 6)) | |
| ax.imshow(wordcloud, interpolation='bilinear') | |
| ax.axis('off') | |
| st.pyplot(fig) | |
| with tab2: | |
| st.header("Skills Analysis") | |
| # Extract skills | |
| tech_skills, soft_skills = analyzer.extract_skills(text) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("π§ Technical Skills") | |
| if tech_skills: | |
| for skill in tech_skills: | |
| st.badge(skill, type="secondary") | |
| else: | |
| st.info("No technical skills detected") | |
| with col2: | |
| st.subheader("π€ Soft Skills") | |
| if soft_skills: | |
| for skill in soft_skills: | |
| st.badge(skill, type="primary") | |
| else: | |
| st.info("No soft skills detected") | |
| # Job role matching | |
| st.subheader(f"π― Match Analysis for {selected_role}") | |
| found_keywords, match_percentage = analyzer.keyword_matching(text, selected_role) | |
| # Progress bar for match percentage | |
| st.metric("Match Percentage", f"{match_percentage:.1f}%") | |
| st.progress(match_percentage / 100) | |
| if found_keywords: | |
| st.write("**Found Keywords:**") | |
| for keyword in found_keywords: | |
| st.badge(keyword, type="success") | |
| # Skills gap analysis | |
| missing_keywords = [kw for kw in analyzer.job_keywords[selected_role] if kw not in found_keywords] | |
| if missing_keywords: | |
| st.write("**Missing Keywords (Consider Adding):**") | |
| for keyword in missing_keywords[:10]: # Show top 10 | |
| st.badge(keyword, type="error") | |
| with tab3: | |
| st.header("Section Breakdown") | |
| sections = analyzer.extract_sections(text) | |
| for section_name, content in sections.items(): | |
| if content: | |
| with st.expander(f"π {section_name.title()} Section"): | |
| st.text_area(f"{section_name} content", content, height=150, disabled=True) | |
| else: | |
| st.warning(f"β {section_name.title()} section not found or empty") | |
| with tab4: | |
| st.header("ATS Analysis") | |
| # Calculate ATS score | |
| ats_score = analyzer.calculate_ats_score(text, sections) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # ATS Score gauge | |
| fig = go.Figure(go.Indicator( | |
| mode="gauge+number+delta", | |
| value=ats_score, | |
| domain={'x': [0, 1], 'y': [0, 1]}, | |
| title={'text': "ATS Friendliness Score"}, | |
| delta={'reference': 80}, | |
| gauge={ | |
| 'axis': {'range': [None, 100]}, | |
| 'bar': {'color': "darkblue"}, | |
| 'steps': [ | |
| {'range': [0, 50], 'color': "lightgray"}, | |
| {'range': [50, 80], 'color': "yellow"}, | |
| {'range': [80, 100], 'color': "green"} | |
| ], | |
| 'threshold': { | |
| 'line': {'color': "red", 'width': 4}, | |
| 'thickness': 0.75, | |
| 'value': 90 | |
| } | |
| } | |
| )) | |
| fig.update_layout(height=300) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| # Grammar check | |
| st.subheader("π Grammar Check") | |
| grammar_errors = analyzer.grammar_check(text) | |
| if grammar_errors: | |
| st.warning(f"Found {len(grammar_errors)} potential issues") | |
| for i, error in enumerate(grammar_errors[:5]): # Show first 5 | |
| st.text(f"{i+1}. {error.message}") | |
| else: | |
| st.success("β No major grammar issues detected") | |
| # ATS recommendations | |
| st.subheader("π‘ ATS Improvement Suggestions") | |
| recommendations = [] | |
| if ats_score < 70: | |
| recommendations.extend([ | |
| "Add more bullet points to improve readability", | |
| "Include contact information (email, phone)", | |
| "Ensure all major sections are present", | |
| "Use standard section headings" | |
| ]) | |
| if match_percentage < 60: | |
| recommendations.append(f"Include more {selected_role}-specific keywords") | |
| if len(text.split()) < 300: | |
| recommendations.append("Consider adding more detailed descriptions") | |
| for rec in recommendations: | |
| st.write(f"β’ {rec}") | |
| with tab5: | |
| st.header("Comprehensive Report & Suggestions") | |
| # Overall scores | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("ATS Score", f"{ats_score}/100", | |
| delta=f"{ats_score-70} vs Average" if ats_score >= 70 else f"{ats_score-70} vs Average") | |
| with col2: | |
| st.metric("Role Match", f"{match_percentage:.1f}%", | |
| delta=f"{match_percentage-60:.1f}% vs Good Match" if match_percentage >= 60 else f"{match_percentage-60:.1f}% vs Good Match") | |
| with col3: | |
| overall_score = (ats_score + match_percentage) / 2 | |
| st.metric("Overall Score", f"{overall_score:.1f}/100") | |
| # Detailed feedback | |
| st.subheader("π Detailed Feedback") | |
| # Strengths | |
| strengths = [] | |
| if ats_score >= 80: | |
| strengths.append("Resume is ATS-friendly") | |
| if match_percentage >= 70: | |
| strengths.append(f"Strong match for {selected_role} position") | |
| if len(tech_skills) >= 5: | |
| strengths.append("Rich technical skill set") | |
| if len(sections) >= 4: | |
| strengths.append("Well-structured with multiple sections") | |
| if strengths: | |
| st.success("**Strengths:**") | |
| for strength in strengths: | |
| st.write(f"β {strength}") | |
| # Areas for improvement | |
| improvements = [] | |
| if ats_score < 70: | |
| improvements.append("Improve ATS compatibility") | |
| if match_percentage < 60: | |
| improvements.append("Add more role-specific keywords") | |
| if not sections.get('projects'): | |
| improvements.append("Consider adding a projects section") | |
| if len(soft_skills) < 3: | |
| improvements.append("Highlight more soft skills") | |
| if improvements: | |
| st.warning("**Areas for Improvement:**") | |
| for improvement in improvements: | |
| st.write(f"β οΈ {improvement}") | |
| # Generate downloadable report | |
| st.subheader("π Download Report") | |
| if st.button("Generate PDF Report"): | |
| # Create a simple text report (in real implementation, use ReportLab) | |
| report_content = f""" | |
| RESUME ANALYSIS REPORT | |
| Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | |
| OVERVIEW: | |
| - ATS Score: {ats_score}/100 | |
| - Role Match: {match_percentage:.1f}% | |
| - Overall Score: {overall_score:.1f}/100 | |
| PERSONA SUMMARY: | |
| {persona_summary} | |
| TECHNICAL SKILLS FOUND: | |
| {', '.join(tech_skills) if tech_skills else 'None detected'} | |
| SOFT SKILLS FOUND: | |
| {', '.join(soft_skills) if soft_skills else 'None detected'} | |
| ROLE-SPECIFIC KEYWORDS FOUND: | |
| {', '.join(found_keywords) if found_keywords else 'None found'} | |
| STRENGTHS: | |
| {chr(10).join(f'- {s}' for s in strengths)} | |
| AREAS FOR IMPROVEMENT: | |
| {chr(10).join(f'- {i}' for i in improvements)} | |
| """ | |
| st.download_button( | |
| label="π₯ Download Report", | |
| data=report_content, | |
| file_name=f"resume_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt", | |
| mime="text/plain" | |
| ) | |
| else: | |
| st.error("β Error processing the uploaded file. Please try a different file.") | |
| if __name__ == "__main__": | |
| main() |