ZainabFatimaa commited on
Commit
036b515
Β·
verified Β·
1 Parent(s): 18a38f3

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +192 -63
src/app.py CHANGED
@@ -23,21 +23,38 @@ import nltk
23
  from nltk.corpus import stopwords
24
  from nltk.tokenize import word_tokenize, sent_tokenize
25
  from nltk.stem import WordNetLemmatizer
26
- import spacy
27
- from fuzzywuzzy import fuzz, process
28
- import language_tool_python
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # ML imports
31
  from sklearn.feature_extraction.text import TfidfVectorizer
32
  from sklearn.metrics.pairwise import cosine_similarity
33
 
34
- # Report generation - FIXED IMPORTS
35
  from reportlab.lib.pagesizes import letter
36
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
37
  from reportlab.lib.styles import getSampleStyleSheet
38
  from reportlab.lib.units import inch
39
-
40
- # Chart imports (if you need charts for PDF reports)
41
  from reportlab.graphics.charts.barcharts import VerticalBarChart
42
  from reportlab.graphics.shapes import Drawing
43
 
@@ -49,32 +66,93 @@ def download_nltk_data():
49
  nltk.data.find('corpora/stopwords')
50
  nltk.data.find('corpora/wordnet')
51
  except LookupError:
52
- nltk.download('punkt')
53
- nltk.download('stopwords')
54
- nltk.download('wordnet')
 
 
55
 
56
- # Initialize tools
57
  @st.cache_resource
58
  def init_tools():
59
  download_nltk_data()
60
- try:
61
- nlp = spacy.load("en_core_web_sm")
62
- except OSError:
63
- st.warning("spaCy model not found. Install with: python -m spacy download en_core_web_sm")
64
- nlp = None
65
 
66
- try:
67
- grammar_tool = language_tool_python.LanguageTool('en-US')
68
- except:
69
- st.warning("Grammar tool initialization failed")
70
- grammar_tool = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  return nlp, grammar_tool
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  class ResumeAnalyzer:
75
  def __init__(self):
76
  self.nlp, self.grammar_tool = init_tools()
77
- self.stop_words = set(stopwords.words('english'))
 
 
 
 
 
 
78
  self.lemmatizer = WordNetLemmatizer()
79
 
80
  # Job role keywords dictionary
@@ -95,12 +173,14 @@ class ResumeAnalyzer:
95
  self.technical_skills = [
96
  "python", "java", "javascript", "c++", "sql", "html", "css", "react", "angular", "vue",
97
  "machine learning", "deep learning", "tensorflow", "pytorch", "pandas", "numpy",
98
- "docker", "kubernetes", "aws", "azure", "git", "jenkins", "ci/cd"
 
99
  ]
100
 
101
  self.soft_skills = [
102
  "leadership", "communication", "teamwork", "problem solving", "critical thinking",
103
- "project management", "time management", "adaptability", "creativity", "analytical"
 
104
  ]
105
 
106
  def extract_text_from_pdf(self, file):
@@ -148,10 +228,21 @@ class ResumeAnalyzer:
148
  # Convert to lowercase
149
  text = text.lower()
150
  # Tokenize
151
- tokens = word_tokenize(text)
 
 
 
 
 
152
  # Remove stopwords and lemmatize
153
- tokens = [self.lemmatizer.lemmatize(token) for token in tokens
154
- if token not in self.stop_words and len(token) > 2]
 
 
 
 
 
 
155
  return tokens
156
 
157
  def extract_sections(self, text):
@@ -160,12 +251,12 @@ class ResumeAnalyzer:
160
 
161
  # Define section patterns
162
  section_patterns = {
163
- 'education': r'(education|academic|qualification|degree)',
164
- 'experience': r'(experience|employment|work|career|professional)',
165
- 'skills': r'(skills|technical|competencies|expertise)',
166
- 'projects': r'(projects|portfolio|work samples)',
167
- 'certifications': r'(certifications?|certificates?|licensed?)',
168
- 'summary': r'(summary|objective|profile|about)'
169
  }
170
 
171
  text_lower = text.lower()
@@ -199,11 +290,13 @@ class ResumeAnalyzer:
199
  found_soft = []
200
 
201
  for skill in self.technical_skills:
202
- if skill in text_lower:
203
  found_technical.append(skill)
204
 
205
  for skill in self.soft_skills:
206
- if skill in text_lower:
 
 
207
  found_soft.append(skill)
208
 
209
  return found_technical, found_soft
@@ -218,23 +311,28 @@ class ResumeAnalyzer:
218
 
219
  found_keywords = []
220
  for keyword in keywords:
221
- # Use fuzzy matching
222
- if fuzz.partial_ratio(keyword, text_lower) > 80:
223
- found_keywords.append(keyword)
 
 
 
 
 
224
 
225
  match_percentage = (len(found_keywords) / len(keywords)) * 100
226
  return found_keywords, match_percentage
227
 
228
  def grammar_check(self, text):
229
  """Check grammar and language quality"""
230
- if not self.grammar_tool:
231
- return []
232
-
233
- try:
234
- matches = self.grammar_tool.check(text[:5000]) # Limit text length
235
- return matches
236
- except:
237
- return []
238
 
239
  def calculate_ats_score(self, text, sections):
240
  """Calculate ATS friendliness score"""
@@ -274,7 +372,6 @@ class ResumeAnalyzer:
274
 
275
  def generate_persona_summary(self, text, sections):
276
  """Generate AI-powered persona summary"""
277
- # Simple template-based summary (can be enhanced with GPT API)
278
  education = sections.get('education', '')
279
  experience = sections.get('experience', '')
280
  skills = sections.get('skills', '')
@@ -373,6 +470,27 @@ def main():
373
  st.title("πŸš€ AI-Powered Resume Analyzer")
374
  st.markdown("Upload your resume and get comprehensive analysis with actionable insights!")
375
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  # Initialize analyzer
377
  analyzer = ResumeAnalyzer()
378
 
@@ -442,12 +560,21 @@ def main():
442
  preprocessed_tokens = analyzer.preprocess_text(text)
443
  if preprocessed_tokens:
444
  wordcloud_text = ' '.join(preprocessed_tokens)
445
- wordcloud = WordCloud(width=800, height=400, background_color='white').generate(wordcloud_text)
446
-
447
- fig, ax = plt.subplots(figsize=(12, 6))
448
- ax.imshow(wordcloud, interpolation='bilinear')
449
- ax.axis('off')
450
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
451
 
452
  with tab2:
453
  st.header("Skills Analysis")
@@ -457,16 +584,19 @@ def main():
457
  with col1:
458
  st.subheader("πŸ”§ Technical Skills")
459
  if tech_skills:
460
- for skill in tech_skills:
461
- st.badge(skill, type="secondary")
 
 
462
  else:
463
  st.info("No technical skills detected")
464
 
465
  with col2:
466
- st.subheader("🀝 Soft Skills")
467
  if soft_skills:
468
- for skill in soft_skills:
469
- st.badge(skill, type="primary")
 
470
  else:
471
  st.info("No soft skills detected")
472
 
@@ -479,15 +609,15 @@ def main():
479
 
480
  if found_keywords:
481
  st.write("**Found Keywords:**")
482
- for keyword in found_keywords:
483
- st.badge(keyword, type="success")
484
 
485
  # Skills gap analysis
486
  missing_keywords = [kw for kw in analyzer.job_keywords[selected_role] if kw not in found_keywords]
487
  if missing_keywords:
488
  st.write("**Missing Keywords (Consider Adding):**")
489
- for keyword in missing_keywords[:10]: # Show top 10
490
- st.badge(keyword, type="error")
491
 
492
  with tab3:
493
  st.header("Section Breakdown")
@@ -623,8 +753,7 @@ def main():
623
 
624
  with col1:
625
  # Text report
626
- if st.button("Generate Text Report"):
627
- report_content = f"""
628
  RESUME ANALYSIS REPORT
629
  Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
630
 
 
23
  from nltk.corpus import stopwords
24
  from nltk.tokenize import word_tokenize, sent_tokenize
25
  from nltk.stem import WordNetLemmatizer
26
+
27
+ # Optional imports with fallbacks
28
+ try:
29
+ import spacy
30
+ SPACY_AVAILABLE = True
31
+ except ImportError:
32
+ SPACY_AVAILABLE = False
33
+ st.warning("spaCy not installed. Some advanced NLP features will be limited.")
34
+
35
+ try:
36
+ from fuzzywuzzy import fuzz, process
37
+ FUZZYWUZZY_AVAILABLE = True
38
+ except ImportError:
39
+ FUZZYWUZZY_AVAILABLE = False
40
+ st.warning("fuzzywuzzy not installed. Using basic string matching instead.")
41
+
42
+ try:
43
+ import language_tool_python
44
+ GRAMMAR_TOOL_AVAILABLE = True
45
+ except ImportError:
46
+ GRAMMAR_TOOL_AVAILABLE = False
47
+ st.warning("language_tool_python not installed. Grammar checking will be disabled.")
48
 
49
  # ML imports
50
  from sklearn.feature_extraction.text import TfidfVectorizer
51
  from sklearn.metrics.pairwise import cosine_similarity
52
 
53
+ # Report generation
54
  from reportlab.lib.pagesizes import letter
55
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
56
  from reportlab.lib.styles import getSampleStyleSheet
57
  from reportlab.lib.units import inch
 
 
58
  from reportlab.graphics.charts.barcharts import VerticalBarChart
59
  from reportlab.graphics.shapes import Drawing
60
 
 
66
  nltk.data.find('corpora/stopwords')
67
  nltk.data.find('corpora/wordnet')
68
  except LookupError:
69
+ with st.spinner("Downloading NLTK data..."):
70
+ nltk.download('punkt', quiet=True)
71
+ nltk.download('stopwords', quiet=True)
72
+ nltk.download('wordnet', quiet=True)
73
+ nltk.download('punkt_tab', quiet=True) # For newer NLTK versions
74
 
75
+ # Initialize tools with better error handling
76
  @st.cache_resource
77
  def init_tools():
78
  download_nltk_data()
 
 
 
 
 
79
 
80
+ nlp = None
81
+ if SPACY_AVAILABLE:
82
+ try:
83
+ nlp = spacy.load("en_core_web_sm")
84
+ st.success("βœ… spaCy model loaded successfully")
85
+ except OSError:
86
+ try:
87
+ # Try to download the model automatically
88
+ import subprocess
89
+ import sys
90
+ with st.spinner("Downloading spaCy model..."):
91
+ subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
92
+ check=True, capture_output=True)
93
+ nlp = spacy.load("en_core_web_sm")
94
+ st.success("βœ… spaCy model downloaded and loaded successfully")
95
+ except Exception as e:
96
+ st.warning("⚠️ spaCy model not available. Advanced NLP features will be limited.")
97
+ nlp = None
98
+
99
+ grammar_tool = None
100
+ if GRAMMAR_TOOL_AVAILABLE:
101
+ try:
102
+ with st.spinner("Initializing grammar checker..."):
103
+ grammar_tool = language_tool_python.LanguageTool('en-US')
104
+ st.success("βœ… Grammar tool initialized successfully")
105
+ except Exception as e:
106
+ st.warning("⚠️ Grammar tool initialization failed. Grammar checking will be disabled.")
107
+ grammar_tool = None
108
 
109
  return nlp, grammar_tool
110
 
111
+ # Fallback functions for when dependencies are missing
112
+ def simple_fuzzy_match(keyword, text):
113
+ """Simple fuzzy matching fallback when fuzzywuzzy is not available"""
114
+ keyword_lower = keyword.lower()
115
+ text_lower = text.lower()
116
+
117
+ # Exact match
118
+ if keyword_lower in text_lower:
119
+ return 100
120
+
121
+ # Check for partial matches with some tolerance
122
+ keyword_words = keyword_lower.split()
123
+ matches = sum(1 for word in keyword_words if word in text_lower)
124
+ return (matches / len(keyword_words)) * 100 if keyword_words else 0
125
+
126
+ def basic_grammar_check(text):
127
+ """Basic grammar check when language_tool_python is not available"""
128
+ issues = []
129
+
130
+ # Check for common issues
131
+ sentences = sent_tokenize(text)
132
+
133
+ for i, sentence in enumerate(sentences):
134
+ # Check for sentences that are too long
135
+ if len(sentence.split()) > 30:
136
+ issues.append(f"Sentence {i+1} might be too long ({len(sentence.split())} words)")
137
+
138
+ # Check for repeated words
139
+ words = sentence.lower().split()
140
+ for j in range(len(words) - 1):
141
+ if words[j] == words[j + 1] and len(words[j]) > 3:
142
+ issues.append(f"Repeated word '{words[j]}' in sentence {i+1}")
143
+
144
+ return [type('MockError', (), {'message': issue}) for issue in issues]
145
+
146
  class ResumeAnalyzer:
147
  def __init__(self):
148
  self.nlp, self.grammar_tool = init_tools()
149
+
150
+ try:
151
+ self.stop_words = set(stopwords.words('english'))
152
+ except LookupError:
153
+ # Fallback stop words
154
+ self.stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'should', 'could', 'can', 'may', 'might', 'must'}
155
+
156
  self.lemmatizer = WordNetLemmatizer()
157
 
158
  # Job role keywords dictionary
 
173
  self.technical_skills = [
174
  "python", "java", "javascript", "c++", "sql", "html", "css", "react", "angular", "vue",
175
  "machine learning", "deep learning", "tensorflow", "pytorch", "pandas", "numpy",
176
+ "docker", "kubernetes", "aws", "azure", "git", "jenkins", "ci/cd", "mongodb", "postgresql",
177
+ "redis", "elasticsearch", "spark", "hadoop", "tableau", "power bi", "excel"
178
  ]
179
 
180
  self.soft_skills = [
181
  "leadership", "communication", "teamwork", "problem solving", "critical thinking",
182
+ "project management", "time management", "adaptability", "creativity", "analytical",
183
+ "collaboration", "innovation", "strategic thinking", "customer service", "negotiation"
184
  ]
185
 
186
  def extract_text_from_pdf(self, file):
 
228
  # Convert to lowercase
229
  text = text.lower()
230
  # Tokenize
231
+ try:
232
+ tokens = word_tokenize(text)
233
+ except LookupError:
234
+ # Fallback tokenization
235
+ tokens = text.split()
236
+
237
  # Remove stopwords and lemmatize
238
+ try:
239
+ tokens = [self.lemmatizer.lemmatize(token) for token in tokens
240
+ if token not in self.stop_words and len(token) > 2]
241
+ except LookupError:
242
+ # Fallback without lemmatization
243
+ tokens = [token for token in tokens
244
+ if token not in self.stop_words and len(token) > 2]
245
+
246
  return tokens
247
 
248
  def extract_sections(self, text):
 
251
 
252
  # Define section patterns
253
  section_patterns = {
254
+ 'education': r'(education|academic|qualification|degree|university|college)',
255
+ 'experience': r'(experience|employment|work|career|professional|job|position)',
256
+ 'skills': r'(skills|technical|competencies|expertise|abilities|technologies)',
257
+ 'projects': r'(projects|portfolio|work samples|personal projects)',
258
+ 'certifications': r'(certifications?|certificates?|licensed?|credentials)',
259
+ 'summary': r'(summary|objective|profile|about|overview)'
260
  }
261
 
262
  text_lower = text.lower()
 
290
  found_soft = []
291
 
292
  for skill in self.technical_skills:
293
+ if skill.lower() in text_lower:
294
  found_technical.append(skill)
295
 
296
  for skill in self.soft_skills:
297
+ # Use more flexible matching for soft skills
298
+ skill_words = skill.lower().split()
299
+ if all(word in text_lower for word in skill_words):
300
  found_soft.append(skill)
301
 
302
  return found_technical, found_soft
 
311
 
312
  found_keywords = []
313
  for keyword in keywords:
314
+ if FUZZYWUZZY_AVAILABLE:
315
+ # Use fuzzy matching
316
+ if fuzz.partial_ratio(keyword, text_lower) > 80:
317
+ found_keywords.append(keyword)
318
+ else:
319
+ # Use simple matching
320
+ if simple_fuzzy_match(keyword, text_lower) > 80:
321
+ found_keywords.append(keyword)
322
 
323
  match_percentage = (len(found_keywords) / len(keywords)) * 100
324
  return found_keywords, match_percentage
325
 
326
  def grammar_check(self, text):
327
  """Check grammar and language quality"""
328
+ if self.grammar_tool and GRAMMAR_TOOL_AVAILABLE:
329
+ try:
330
+ matches = self.grammar_tool.check(text[:5000]) # Limit text length
331
+ return matches
332
+ except:
333
+ return basic_grammar_check(text)
334
+ else:
335
+ return basic_grammar_check(text)
336
 
337
  def calculate_ats_score(self, text, sections):
338
  """Calculate ATS friendliness score"""
 
372
 
373
  def generate_persona_summary(self, text, sections):
374
  """Generate AI-powered persona summary"""
 
375
  education = sections.get('education', '')
376
  experience = sections.get('experience', '')
377
  skills = sections.get('skills', '')
 
470
  st.title("πŸš€ AI-Powered Resume Analyzer")
471
  st.markdown("Upload your resume and get comprehensive analysis with actionable insights!")
472
 
473
+ # Show dependency status
474
+ with st.expander("πŸ“‹ Dependency Status", expanded=False):
475
+ col1, col2, col3 = st.columns(3)
476
+ with col1:
477
+ if SPACY_AVAILABLE:
478
+ st.success("βœ… spaCy Available")
479
+ else:
480
+ st.warning("⚠️ spaCy Not Available")
481
+
482
+ with col2:
483
+ if FUZZYWUZZY_AVAILABLE:
484
+ st.success("βœ… FuzzyWuzzy Available")
485
+ else:
486
+ st.warning("⚠️ FuzzyWuzzy Not Available")
487
+
488
+ with col3:
489
+ if GRAMMAR_TOOL_AVAILABLE:
490
+ st.success("βœ… Grammar Tool Available")
491
+ else:
492
+ st.warning("⚠️ Grammar Tool Not Available")
493
+
494
  # Initialize analyzer
495
  analyzer = ResumeAnalyzer()
496
 
 
560
  preprocessed_tokens = analyzer.preprocess_text(text)
561
  if preprocessed_tokens:
562
  wordcloud_text = ' '.join(preprocessed_tokens)
563
+ try:
564
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(wordcloud_text)
565
+
566
+ fig, ax = plt.subplots(figsize=(12, 6))
567
+ ax.imshow(wordcloud, interpolation='bilinear')
568
+ ax.axis('off')
569
+ st.pyplot(fig)
570
+ except Exception as e:
571
+ st.warning("Could not generate word cloud. Showing top words instead.")
572
+ word_freq = Counter(preprocessed_tokens)
573
+ top_words = word_freq.most_common(20)
574
+
575
+ words_df = pd.DataFrame(top_words, columns=['Word', 'Frequency'])
576
+ fig = px.bar(words_df, x='Word', y='Frequency', title='Top 20 Words')
577
+ st.plotly_chart(fig)
578
 
579
  with tab2:
580
  st.header("Skills Analysis")
 
584
  with col1:
585
  st.subheader("πŸ”§ Technical Skills")
586
  if tech_skills:
587
+ # Create a nice display for skills
588
+ skills_text = " β€’ ".join(tech_skills)
589
+ st.success(f"Found {len(tech_skills)} technical skills:")
590
+ st.write(skills_text)
591
  else:
592
  st.info("No technical skills detected")
593
 
594
  with col2:
595
+ st.subheader("🀝 Soft Skills")
596
  if soft_skills:
597
+ skills_text = " β€’ ".join(soft_skills)
598
+ st.success(f"Found {len(soft_skills)} soft skills:")
599
+ st.write(skills_text)
600
  else:
601
  st.info("No soft skills detected")
602
 
 
609
 
610
  if found_keywords:
611
  st.write("**Found Keywords:**")
612
+ keywords_text = " β€’ ".join(found_keywords)
613
+ st.success(keywords_text)
614
 
615
  # Skills gap analysis
616
  missing_keywords = [kw for kw in analyzer.job_keywords[selected_role] if kw not in found_keywords]
617
  if missing_keywords:
618
  st.write("**Missing Keywords (Consider Adding):**")
619
+ missing_text = " β€’ ".join(missing_keywords[:10]) # Show top 10
620
+ st.warning(missing_text)
621
 
622
  with tab3:
623
  st.header("Section Breakdown")
 
753
 
754
  with col1:
755
  # Text report
756
+ report_content = f"""
 
757
  RESUME ANALYSIS REPORT
758
  Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
759