Spaces:

ZainabFatimaa
/

Resume-Analyzer

Sleeping

App Files Files Community

ZainabFatimaa commited on May 31

Commit

036b515

verified ·

1 Parent(s): 18a38f3

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +192 -63

src/app.py CHANGED Viewed

@@ -23,21 +23,38 @@ import nltk
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize, sent_tokenize
 from nltk.stem import WordNetLemmatizer
-import spacy
-from fuzzywuzzy import fuzz, process
-import language_tool_python
 # ML imports
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
-# Report generation - FIXED IMPORTS
 from reportlab.lib.pagesizes import letter
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
 from reportlab.lib.styles import getSampleStyleSheet
 from reportlab.lib.units import inch
-# Chart imports (if you need charts for PDF reports)
 from reportlab.graphics.charts.barcharts import VerticalBarChart
 from reportlab.graphics.shapes import Drawing
@@ -49,32 +66,93 @@ def download_nltk_data():
         nltk.data.find('corpora/stopwords')
         nltk.data.find('corpora/wordnet')
     except LookupError:
-        nltk.download('punkt')
-        nltk.download('stopwords')
-        nltk.download('wordnet')
-# Initialize tools
 @st.cache_resource
 def init_tools():
     download_nltk_data()
-    try:
-        nlp = spacy.load("en_core_web_sm")
-    except OSError:
-        st.warning("spaCy model not found. Install with: python -m spacy download en_core_web_sm")
-        nlp = None
-    try:
-        grammar_tool = language_tool_python.LanguageTool('en-US')
-    except:
-        st.warning("Grammar tool initialization failed")
-        grammar_tool = None
     return nlp, grammar_tool
 class ResumeAnalyzer:
     def __init__(self):
         self.nlp, self.grammar_tool = init_tools()
-        self.stop_words = set(stopwords.words('english'))
         self.lemmatizer = WordNetLemmatizer()
         # Job role keywords dictionary
@@ -95,12 +173,14 @@ class ResumeAnalyzer:
         self.technical_skills = [
             "python", "java", "javascript", "c++", "sql", "html", "css", "react", "angular", "vue",
             "machine learning", "deep learning", "tensorflow", "pytorch", "pandas", "numpy",
-            "docker", "kubernetes", "aws", "azure", "git", "jenkins", "ci/cd"
         ]
         self.soft_skills = [
             "leadership", "communication", "teamwork", "problem solving", "critical thinking",
-            "project management", "time management", "adaptability", "creativity", "analytical"
         ]
     def extract_text_from_pdf(self, file):
@@ -148,10 +228,21 @@ class ResumeAnalyzer:
         # Convert to lowercase
         text = text.lower()
         # Tokenize
-        tokens = word_tokenize(text)
         # Remove stopwords and lemmatize
-        tokens = [self.lemmatizer.lemmatize(token) for token in tokens
-                 if token not in self.stop_words and len(token) > 2]
         return tokens
     def extract_sections(self, text):
@@ -160,12 +251,12 @@ class ResumeAnalyzer:
         # Define section patterns
         section_patterns = {
-            'education': r'(education|academic|qualification|degree)',
-            'experience': r'(experience|employment|work|career|professional)',
-            'skills': r'(skills|technical|competencies|expertise)',
-            'projects': r'(projects|portfolio|work samples)',
-            'certifications': r'(certifications?|certificates?|licensed?)',
-            'summary': r'(summary|objective|profile|about)'
         }
         text_lower = text.lower()
@@ -199,11 +290,13 @@ class ResumeAnalyzer:
         found_soft = []
         for skill in self.technical_skills:
-            if skill in text_lower:
                 found_technical.append(skill)
         for skill in self.soft_skills:
-            if skill in text_lower:
                 found_soft.append(skill)
         return found_technical, found_soft
@@ -218,23 +311,28 @@ class ResumeAnalyzer:
         found_keywords = []
         for keyword in keywords:
-            # Use fuzzy matching
-            if fuzz.partial_ratio(keyword, text_lower) > 80:
-                found_keywords.append(keyword)
         match_percentage = (len(found_keywords) / len(keywords)) * 100
         return found_keywords, match_percentage
     def grammar_check(self, text):
         """Check grammar and language quality"""
-        if not self.grammar_tool:
-            return []
-        try:
-            matches = self.grammar_tool.check(text[:5000])  # Limit text length
-            return matches
-        except:
-            return []
     def calculate_ats_score(self, text, sections):
         """Calculate ATS friendliness score"""
@@ -274,7 +372,6 @@ class ResumeAnalyzer:
     def generate_persona_summary(self, text, sections):
         """Generate AI-powered persona summary"""
-        # Simple template-based summary (can be enhanced with GPT API)
         education = sections.get('education', '')
         experience = sections.get('experience', '')
         skills = sections.get('skills', '')
@@ -373,6 +470,27 @@ def main():
     st.title("🚀 AI-Powered Resume Analyzer")
     st.markdown("Upload your resume and get comprehensive analysis with actionable insights!")
     # Initialize analyzer
     analyzer = ResumeAnalyzer()
@@ -442,12 +560,21 @@ def main():
                 preprocessed_tokens = analyzer.preprocess_text(text)
                 if preprocessed_tokens:
                     wordcloud_text = ' '.join(preprocessed_tokens)
-                    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(wordcloud_text)
-                    fig, ax = plt.subplots(figsize=(12, 6))
-                    ax.imshow(wordcloud, interpolation='bilinear')
-                    ax.axis('off')
-                    st.pyplot(fig)
             with tab2:
                 st.header("Skills Analysis")
@@ -457,16 +584,19 @@ def main():
                 with col1:
                     st.subheader("🔧 Technical Skills")
                     if tech_skills:
-                        for skill in tech_skills:
-                            st.badge(skill, type="secondary")
                     else:
                         st.info("No technical skills detected")
                 with col2:
-                    st.subheader("🤝 Soft Skills")
                     if soft_skills:
-                        for skill in soft_skills:
-                            st.badge(skill, type="primary")
                     else:
                         st.info("No soft skills detected")
@@ -479,15 +609,15 @@ def main():
                 if found_keywords:
                     st.write("**Found Keywords:**")
-                    for keyword in found_keywords:
-                        st.badge(keyword, type="success")
                 # Skills gap analysis
                 missing_keywords = [kw for kw in analyzer.job_keywords[selected_role] if kw not in found_keywords]
                 if missing_keywords:
                     st.write("**Missing Keywords (Consider Adding):**")
-                    for keyword in missing_keywords[:10]:  # Show top 10
-                        st.badge(keyword, type="error")
             with tab3:
                 st.header("Section Breakdown")
@@ -623,8 +753,7 @@ def main():
                 with col1:
                     # Text report
-                    if st.button("Generate Text Report"):
-                        report_content = f"""
 RESUME ANALYSIS REPORT
 Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize, sent_tokenize
 from nltk.stem import WordNetLemmatizer
+# Optional imports with fallbacks
+try:
+    import spacy
+    SPACY_AVAILABLE = True
+except ImportError:
+    SPACY_AVAILABLE = False
+    st.warning("spaCy not installed. Some advanced NLP features will be limited.")
+try:
+    from fuzzywuzzy import fuzz, process
+    FUZZYWUZZY_AVAILABLE = True
+except ImportError:
+    FUZZYWUZZY_AVAILABLE = False
+    st.warning("fuzzywuzzy not installed. Using basic string matching instead.")
+try:
+    import language_tool_python
+    GRAMMAR_TOOL_AVAILABLE = True
+except ImportError:
+    GRAMMAR_TOOL_AVAILABLE = False
+    st.warning("language_tool_python not installed. Grammar checking will be disabled.")
 # ML imports
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+# Report generation
 from reportlab.lib.pagesizes import letter
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
 from reportlab.lib.styles import getSampleStyleSheet
 from reportlab.lib.units import inch
 from reportlab.graphics.charts.barcharts import VerticalBarChart
 from reportlab.graphics.shapes import Drawing
         nltk.data.find('corpora/stopwords')
         nltk.data.find('corpora/wordnet')
     except LookupError:
+        with st.spinner("Downloading NLTK data..."):
+            nltk.download('punkt', quiet=True)
+            nltk.download('stopwords', quiet=True)
+            nltk.download('wordnet', quiet=True)
+            nltk.download('punkt_tab', quiet=True)  # For newer NLTK versions
+# Initialize tools with better error handling
 @st.cache_resource
 def init_tools():
     download_nltk_data()
+    nlp = None
+    if SPACY_AVAILABLE:
+        try:
+            nlp = spacy.load("en_core_web_sm")
+            st.success("✅ spaCy model loaded successfully")
+        except OSError:
+            try:
+                # Try to download the model automatically
+                import subprocess
+                import sys
+                with st.spinner("Downloading spaCy model..."):
+                    subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
+                                 check=True, capture_output=True)
+                nlp = spacy.load("en_core_web_sm")
+                st.success("✅ spaCy model downloaded and loaded successfully")
+            except Exception as e:
+                st.warning("⚠️ spaCy model not available. Advanced NLP features will be limited.")
+                nlp = None
+    grammar_tool = None
+    if GRAMMAR_TOOL_AVAILABLE:
+        try:
+            with st.spinner("Initializing grammar checker..."):
+                grammar_tool = language_tool_python.LanguageTool('en-US')
+            st.success("✅ Grammar tool initialized successfully")
+        except Exception as e:
+            st.warning("⚠️ Grammar tool initialization failed. Grammar checking will be disabled.")
+            grammar_tool = None
     return nlp, grammar_tool
+# Fallback functions for when dependencies are missing
+def simple_fuzzy_match(keyword, text):
+    """Simple fuzzy matching fallback when fuzzywuzzy is not available"""
+    keyword_lower = keyword.lower()
+    text_lower = text.lower()
+    # Exact match
+    if keyword_lower in text_lower:
+        return 100
+    # Check for partial matches with some tolerance
+    keyword_words = keyword_lower.split()
+    matches = sum(1 for word in keyword_words if word in text_lower)
+    return (matches / len(keyword_words)) * 100 if keyword_words else 0
+def basic_grammar_check(text):
+    """Basic grammar check when language_tool_python is not available"""
+    issues = []
+    # Check for common issues
+    sentences = sent_tokenize(text)
+    for i, sentence in enumerate(sentences):
+        # Check for sentences that are too long
+        if len(sentence.split()) > 30:
+            issues.append(f"Sentence {i+1} might be too long ({len(sentence.split())} words)")
+        # Check for repeated words
+        words = sentence.lower().split()
+        for j in range(len(words) - 1):
+            if words[j] == words[j + 1] and len(words[j]) > 3:
+                issues.append(f"Repeated word '{words[j]}' in sentence {i+1}")
+    return [type('MockError', (), {'message': issue}) for issue in issues]
 class ResumeAnalyzer:
     def __init__(self):
         self.nlp, self.grammar_tool = init_tools()
+        try:
+            self.stop_words = set(stopwords.words('english'))
+        except LookupError:
+            # Fallback stop words
+            self.stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'should', 'could', 'can', 'may', 'might', 'must'}
         self.lemmatizer = WordNetLemmatizer()
         # Job role keywords dictionary
         self.technical_skills = [
             "python", "java", "javascript", "c++", "sql", "html", "css", "react", "angular", "vue",
             "machine learning", "deep learning", "tensorflow", "pytorch", "pandas", "numpy",
+            "docker", "kubernetes", "aws", "azure", "git", "jenkins", "ci/cd", "mongodb", "postgresql",
+            "redis", "elasticsearch", "spark", "hadoop", "tableau", "power bi", "excel"
         ]
         self.soft_skills = [
             "leadership", "communication", "teamwork", "problem solving", "critical thinking",
+            "project management", "time management", "adaptability", "creativity", "analytical",
+            "collaboration", "innovation", "strategic thinking", "customer service", "negotiation"
         ]
     def extract_text_from_pdf(self, file):
         # Convert to lowercase
         text = text.lower()
         # Tokenize
+        try:
+            tokens = word_tokenize(text)
+        except LookupError:
+            # Fallback tokenization
+            tokens = text.split()
         # Remove stopwords and lemmatize
+        try:
+            tokens = [self.lemmatizer.lemmatize(token) for token in tokens
+                     if token not in self.stop_words and len(token) > 2]
+        except LookupError:
+            # Fallback without lemmatization
+            tokens = [token for token in tokens
+                     if token not in self.stop_words and len(token) > 2]
         return tokens
     def extract_sections(self, text):
         # Define section patterns
         section_patterns = {
+            'education': r'(education|academic|qualification|degree|university|college)',
+            'experience': r'(experience|employment|work|career|professional|job|position)',
+            'skills': r'(skills|technical|competencies|expertise|abilities|technologies)',
+            'projects': r'(projects|portfolio|work samples|personal projects)',
+            'certifications': r'(certifications?|certificates?|licensed?|credentials)',
+            'summary': r'(summary|objective|profile|about|overview)'
         }
         text_lower = text.lower()
         found_soft = []
         for skill in self.technical_skills:
+            if skill.lower() in text_lower:
                 found_technical.append(skill)
         for skill in self.soft_skills:
+            # Use more flexible matching for soft skills
+            skill_words = skill.lower().split()
+            if all(word in text_lower for word in skill_words):
                 found_soft.append(skill)
         return found_technical, found_soft
         found_keywords = []
         for keyword in keywords:
+            if FUZZYWUZZY_AVAILABLE:
+                # Use fuzzy matching
+                if fuzz.partial_ratio(keyword, text_lower) > 80:
+                    found_keywords.append(keyword)
+            else:
+                # Use simple matching
+                if simple_fuzzy_match(keyword, text_lower) > 80:
+                    found_keywords.append(keyword)
         match_percentage = (len(found_keywords) / len(keywords)) * 100
         return found_keywords, match_percentage
     def grammar_check(self, text):
         """Check grammar and language quality"""
+        if self.grammar_tool and GRAMMAR_TOOL_AVAILABLE:
+            try:
+                matches = self.grammar_tool.check(text[:5000])  # Limit text length
+                return matches
+            except:
+                return basic_grammar_check(text)
+        else:
+            return basic_grammar_check(text)
     def calculate_ats_score(self, text, sections):
         """Calculate ATS friendliness score"""
     def generate_persona_summary(self, text, sections):
         """Generate AI-powered persona summary"""
         education = sections.get('education', '')
         experience = sections.get('experience', '')
         skills = sections.get('skills', '')
     st.title("🚀 AI-Powered Resume Analyzer")
     st.markdown("Upload your resume and get comprehensive analysis with actionable insights!")
+    # Show dependency status
+    with st.expander("📋 Dependency Status", expanded=False):
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            if SPACY_AVAILABLE:
+                st.success("✅ spaCy Available")
+            else:
+                st.warning("⚠️ spaCy Not Available")
+        with col2:
+            if FUZZYWUZZY_AVAILABLE:
+                st.success("✅ FuzzyWuzzy Available")
+            else:
+                st.warning("⚠️ FuzzyWuzzy Not Available")
+        with col3:
+            if GRAMMAR_TOOL_AVAILABLE:
+                st.success("✅ Grammar Tool Available")
+            else:
+                st.warning("⚠️ Grammar Tool Not Available")
     # Initialize analyzer
     analyzer = ResumeAnalyzer()
                 preprocessed_tokens = analyzer.preprocess_text(text)
                 if preprocessed_tokens:
                     wordcloud_text = ' '.join(preprocessed_tokens)
+                    try:
+                        wordcloud = WordCloud(width=800, height=400, background_color='white').generate(wordcloud_text)
+                        fig, ax = plt.subplots(figsize=(12, 6))
+                        ax.imshow(wordcloud, interpolation='bilinear')
+                        ax.axis('off')
+                        st.pyplot(fig)
+                    except Exception as e:
+                        st.warning("Could not generate word cloud. Showing top words instead.")
+                        word_freq = Counter(preprocessed_tokens)
+                        top_words = word_freq.most_common(20)
+                        words_df = pd.DataFrame(top_words, columns=['Word', 'Frequency'])
+                        fig = px.bar(words_df, x='Word', y='Frequency', title='Top 20 Words')
+                        st.plotly_chart(fig)
             with tab2:
                 st.header("Skills Analysis")
                 with col1:
                     st.subheader("🔧 Technical Skills")
                     if tech_skills:
+                        # Create a nice display for skills
+                        skills_text = " • ".join(tech_skills)
+                        st.success(f"Found {len(tech_skills)} technical skills:")
+                        st.write(skills_text)
                     else:
                         st.info("No technical skills detected")
                 with col2:
+                    st.subheader("🤝 Soft Skills")
                     if soft_skills:
+                        skills_text = " • ".join(soft_skills)
+                        st.success(f"Found {len(soft_skills)} soft skills:")
+                        st.write(skills_text)
                     else:
                         st.info("No soft skills detected")
                 if found_keywords:
                     st.write("**Found Keywords:**")
+                    keywords_text = " • ".join(found_keywords)
+                    st.success(keywords_text)
                 # Skills gap analysis
                 missing_keywords = [kw for kw in analyzer.job_keywords[selected_role] if kw not in found_keywords]
                 if missing_keywords:
                     st.write("**Missing Keywords (Consider Adding):**")
+                    missing_text = " • ".join(missing_keywords[:10])  # Show top 10
+                    st.warning(missing_text)
             with tab3:
                 st.header("Section Breakdown")
                 with col1:
                     # Text report
+                    report_content = f"""
 RESUME ANALYSIS REPORT
 Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}