Spaces:

ZainabFatimaa
/

Resume-Analyzer

Sleeping

App Files Files Community

ZainabFatimaa commited on Sep 20

Commit

301e290

verified ·

1 Parent(s): 946b75e

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +245 -182

src/app.py CHANGED Viewed

@@ -70,8 +70,10 @@ import re
 from datetime import datetime
 from typing import Dict, List
-# Simple NLP processor
-class SimpleNLPProcessor:
     def __init__(self):
         self.setup_nltk()
@@ -86,32 +88,30 @@ class SimpleNLPProcessor:
             self.stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'with'}
             self.lemmatizer = None
-    def process_text(self, text: str) -> str:
-        """Clean and process text for better context"""
         try:
             tokens = word_tokenize(text.lower())
         except:
             tokens = text.lower().split()
-        # Remove stopwords and short tokens
-        filtered_tokens = [token for token in tokens
-                          if token not in self.stop_words and len(token) > 2]
-        # Lemmatize if available
-        if self.lemmatizer:
-            try:
-                filtered_tokens = [self.lemmatizer.lemmatize(token) for token in filtered_tokens]
-            except:
-                pass
-        # Return key terms (limit to avoid long prompts)
-        return ' '.join(filtered_tokens[:15])
-# Simple memory management
-class SimpleChatMemory:
     def __init__(self):
-        if 'chat_history' not in st.session_state:
-            st.session_state.chat_history = []
     def add_conversation(self, user_msg: str, bot_response: str):
         conversation = {
@@ -119,62 +119,69 @@ class SimpleChatMemory:
             'bot': bot_response,
             'timestamp': datetime.now().strftime("%H:%M:%S")
         }
-        st.session_state.chat_history.append(conversation)
-        # Keep only last 8 conversations to save memory
-        if len(st.session_state.chat_history) > 8:
-            st.session_state.chat_history = st.session_state.chat_history[-8:]
-    def get_recent_context(self, limit: int = 2) -> str:
-        """Get recent conversation context"""
-        if not st.session_state.chat_history:
             return ""
-        recent = st.session_state.chat_history[-limit:]
-        context_parts = []
-        for conv in recent:
-            context_parts.append(f"User asked: {conv['user'][:50]}...")
-        return " | ".join(context_parts) if context_parts else ""
-# Main chatbot class
-class SimpleCPUChatbot:
     def __init__(self):
-        self.model_name = "distilgpt2"  # Fast, CPU-friendly model
         self.model = None
         self.tokenizer = None
         self.pipeline = None
-        self.nlp_processor = SimpleNLPProcessor()
-        self.memory = SimpleChatMemory()
         self.is_loaded = False
     @st.cache_resource
     def load_model(_self):
-        """Load the model (cached for efficiency)"""
         try:
             with st.spinner("Loading AI model (first time may take 2-3 minutes)..."):
-                # Load tokenizer
                 tokenizer = AutoTokenizer.from_pretrained(_self.model_name)
                 tokenizer.pad_token = tokenizer.eos_token
-                # Load model with CPU optimization
                 model = AutoModelForCausalLM.from_pretrained(
                     _self.model_name,
-                    torch_dtype=torch.float32,  # Use float32 for CPU
                     low_cpu_mem_usage=True
                 )
-                # Create pipeline
                 text_generator = pipeline(
                     "text-generation",
                     model=model,
                     tokenizer=tokenizer,
                     device=-1,  # CPU only
-                    max_new_tokens=80,
                     do_sample=True,
-                    temperature=0.7,
-                    top_p=0.9,
-                    pad_token_id=tokenizer.eos_token_id
                 )
                 return model, tokenizer, text_generator
@@ -192,67 +199,83 @@ class SimpleCPUChatbot:
                 st.success("AI model loaded successfully!")
                 return True
             else:
-                st.error("Failed to load AI model")
                 return False
         return True
-    def create_prompt(self, user_input: str, resume_context: str = "") -> str:
-        """Create a focused prompt for resume advice"""
-        # Process user input for key terms
-        key_terms = self.nlp_processor.process_text(user_input)
-        # Get conversation context
-        recent_context = self.memory.get_recent_context()
-        # Build prompt
-        prompt_parts = [
-            "You are a professional resume consultant. Give specific, helpful advice."
-        ]
-        # Add resume context if available (limited)
         if resume_context:
-            resume_excerpt = resume_context[:200] + "..." if len(resume_context) > 200 else resume_context
-            prompt_parts.append(f"Resume excerpt: {resume_excerpt}")
-        # Add conversation context
-        if recent_context:
-            prompt_parts.append(f"Previous topics: {recent_context}")
-        # Add key terms from current question
-        if key_terms:
-            prompt_parts.append(f"Focus areas: {key_terms}")
-        # Add the actual question
-        prompt_parts.append(f"Question: {user_input}")
-        prompt_parts.append("Advice:")
-        return " | ".join(prompt_parts)
     def generate_response(self, user_input: str, resume_context: str = "") -> str:
-        """Generate response using the loaded model"""
         if not self.is_loaded:
             return "Please initialize the AI model first by clicking 'Initialize AI'."
         try:
-            # Create prompt
-            prompt = self.create_prompt(user_input, resume_context)
-            # Generate response
             result = self.pipeline(
                 prompt,
-                max_new_tokens=60,  # Keep responses concise
                 num_return_sequences=1,
                 temperature=0.7,
                 do_sample=True,
-                top_p=0.9
             )
             # Extract and clean response
             generated_text = result[0]['generated_text']
             response = generated_text.replace(prompt, "").strip()
-            # Clean up the response
-            response = self.clean_response(response, user_input)
             # Add to memory
             self.memory.add_conversation(user_input, response)
@@ -260,30 +283,157 @@ class SimpleCPUChatbot:
             return response
         except Exception as e:
-            return f"Sorry, I encountered an error: {str(e)}. Please try a simpler question."
-    def clean_response(self, response: str, user_input: str) -> str:
-        """Clean and improve the generated response"""
-        # Remove extra whitespace and newlines
-        response = re.sub(r'\s+', ' ', response).strip()
-        # Split into sentences and take first few good ones
         sentences = [s.strip() for s in response.split('.') if s.strip()]
         good_sentences = []
-        for sentence in sentences[:3]:  # Max 3 sentences
-            if len(sentence) > 10 and not sentence.lower().startswith(('you are', 'i am', 'as a')):
                 good_sentences.append(sentence)
         if good_sentences:
             response = '. '.join(good_sentences)
             if not response.endswith('.'):
                 response += '.'
         else:
-            # Fallback response
-            response = "I'd be happy to help with your resume. Could you be more specific about what you need assistance with?"
-        return response
 # Download NLTK data if not already present
 @st.cache_resource
 def download_nltk_data():
@@ -708,94 +858,7 @@ class ResumeAnalyzer:
         doc.build(story)
         buffer.seek(0)
         return buffer
-def create_simple_chat_interface(resume_context: str = ""):
-    """Create simple chat interface for the resume analyzer"""
-    st.header("🤖 AI Resume Assistant")
-    # Initialize chatbot
-    if 'simple_chatbot' not in st.session_state:
-        st.session_state.simple_chatbot = SimpleCPUChatbot()
-    chatbot = st.session_state.simple_chatbot
-    # Model initialization
-    col1, col2 = st.columns([3, 1])
-    with col1:
-        st.info("Using DistilGPT2 - Fast CPU-only model (≈250MB download)")
-    with col2:
-        if st.button("Initialize AI", type="primary"):
-            chatbot.initialize()
-    # Chat interface
-    if chatbot.is_loaded:
-        st.success("✅ AI Ready")
-        # Sample questions
-        with st.expander("💡 Try asking"):
-            sample_questions = [
-                "How can I improve my resume?",
-                "What skills should I add?",
-                "How do I make it more ATS-friendly?",
-                "What's wrong with my experience section?"
-            ]
-            for q in sample_questions:
-                if st.button(q, key=f"sample_{hash(q)}"):
-                    st.session_state.current_question = q
-        # Chat input
-        user_question = st.text_input(
-            "Ask about your resume:",
-            value=st.session_state.get('current_question', ''),
-            placeholder="How can I improve my resume for tech jobs?",
-            key="chat_input"
-        )
-        # Send button and clear
-        col1, col2 = st.columns([1, 3])
-        with col1:
-            send_clicked = st.button("Send", type="primary")
-        with col2:
-            if st.button("Clear Chat"):
-                st.session_state.chat_history = []
-                if 'current_question' in st.session_state:
-                    del st.session_state.current_question
-                st.experimental_rerun()
-        # Generate response
-        if send_clicked and user_question.strip():
-            with st.spinner("Thinking..."):
-                response = chatbot.generate_response(user_question, resume_context)
-                if 'current_question' in st.session_state:
-                    del st.session_state.current_question
-                st.experimental_rerun()
-        # Display chat history
-        if st.session_state.chat_history:
-            st.subheader("💬 Conversation")
-            for conv in reversed(st.session_state.chat_history[-5:]):  # Show last 5
-                st.markdown(f"**You:** {conv['user']}")
-                st.markdown(f"**AI:** {conv['bot']}")
-                st.caption(f"Time: {conv['timestamp']}")
-                st.divider()
-    else:
-        st.warning("Click 'Initialize AI' to start chatting")
-        with st.expander("ℹ️ About this AI"):
-            st.markdown("""
-            **Model**: DistilGPT2 (CPU-optimized)
-            **Size**: ~250MB download
-            **Speed**: 2-5 seconds per response
-            **Memory**: ~1GB RAM usage
-            This model runs entirely on your CPU and provides helpful resume advice.
-            First initialization will download the model files.
-            """)
 def main():
     st.set_page_config(
         page_title="AI Resume Analyzer with Chatbot",
@@ -1046,7 +1109,7 @@ def main():
                 # Chat Interface - Properly indented inside the file upload condition
                 st.markdown("---")
                 st.header("💬 Chat with Resume Assistant")
-                create_simple_chat_interface(st.session_state.get('resume_context', ''))
             except Exception as e:
                 st.error(f"Error during analysis: {str(e)}")

 from datetime import datetime
 from typing import Dict, List
+# Set seed for reproducibility
+set_seed(42)
+class ImprovedNLPProcessor:
     def __init__(self):
         self.setup_nltk()
             self.stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'with'}
             self.lemmatizer = None
+    def extract_key_terms(self, text: str, max_terms: int = 5) -> str:
+        """Extract key terms without overwhelming the model"""
         try:
             tokens = word_tokenize(text.lower())
         except:
             tokens = text.lower().split()
+        # Focus on resume-relevant terms
+        resume_keywords = ['resume', 'experience', 'skills', 'education', 'job', 'work', 'ats', 'career']
+        filtered_tokens = []
+        for token in tokens:
+            if (len(token) > 2 and
+                token not in self.stop_words and
+                (token.isalpha() or token in resume_keywords)):
+                filtered_tokens.append(token)
+        # Return only the most relevant terms
+        return ' '.join(filtered_tokens[:max_terms])
+class ImprovedChatMemory:
     def __init__(self):
+        if 'improved_chat_history' not in st.session_state:
+            st.session_state.improved_chat_history = []
     def add_conversation(self, user_msg: str, bot_response: str):
         conversation = {
             'bot': bot_response,
             'timestamp': datetime.now().strftime("%H:%M:%S")
         }
+        st.session_state.improved_chat_history.append(conversation)
+        # Keep only last 6 conversations
+        if len(st.session_state.improved_chat_history) > 6:
+            st.session_state.improved_chat_history = st.session_state.improved_chat_history[-6:]
+    def get_simple_context(self) -> str:
+        """Get very simple context to avoid confusing the model"""
+        if not st.session_state.improved_chat_history:
             return ""
+        # Only use the last conversation for context
+        last_conv = st.session_state.improved_chat_history[-1]
+        last_topic = last_conv['user'][:30]  # First 30 chars only
+        return f"Previously discussed: {last_topic}"
+class ImprovedCPUChatbot:
     def __init__(self):
+        self.model_name = "distilgpt2"
         self.model = None
         self.tokenizer = None
         self.pipeline = None
+        self.nlp_processor = ImprovedNLPProcessor()
+        self.memory = ImprovedChatMemory()
         self.is_loaded = False
+        # Predefined responses for common resume questions
+        self.template_responses = {
+            'experience': "To improve your experience section: Use bullet points with action verbs, quantify achievements with numbers, focus on results rather than duties, and tailor content to match job requirements.",
+            'ats': "Make your resume ATS-friendly by: Using standard section headings, including relevant keywords naturally, avoiding images and complex formatting, using common fonts like Arial, and saving as PDF.",
+            'skills': "Enhance your skills section by: Organizing technical and soft skills separately, matching skills to job descriptions, providing proficiency levels, and including both hard and soft skills relevant to your target role.",
+            'keywords': "Add relevant keywords by: Studying job descriptions in your field, using industry-specific terms, including both acronyms and full terms, and incorporating them naturally throughout your resume.",
+            'format': "Improve resume formatting with: Clear section headings, consistent bullet points, readable fonts, appropriate white space, and a clean, professional layout that's easy to scan."
+        }
     @st.cache_resource
     def load_model(_self):
+        """Load the model with better configuration"""
         try:
             with st.spinner("Loading AI model (first time may take 2-3 minutes)..."):
                 tokenizer = AutoTokenizer.from_pretrained(_self.model_name)
                 tokenizer.pad_token = tokenizer.eos_token
                 model = AutoModelForCausalLM.from_pretrained(
                     _self.model_name,
+                    torch_dtype=torch.float32,
                     low_cpu_mem_usage=True
                 )
+                # Create pipeline with better parameters
                 text_generator = pipeline(
                     "text-generation",
                     model=model,
                     tokenizer=tokenizer,
                     device=-1,  # CPU only
+                    max_new_tokens=50,  # Reduced for better quality
                     do_sample=True,
+                    temperature=0.8,
+                    top_p=0.85,
+                    top_k=50,
+                    repetition_penalty=1.2,  # Reduce repetition
+                    pad_token_id=tokenizer.eos_token_id,
+                    no_repeat_ngram_size=3  # Prevent 3-gram repetition
                 )
                 return model, tokenizer, text_generator
                 st.success("AI model loaded successfully!")
                 return True
             else:
                 return False
         return True
+    def get_template_response(self, user_input: str) -> str:
+        """Check if we can use a template response for common questions"""
+        user_lower = user_input.lower()
+        # Check for common patterns
+        if any(word in user_lower for word in ['experience', 'work history', 'job history']):
+            return self.template_responses['experience']
+        elif any(word in user_lower for word in ['ats', 'applicant tracking', 'ats-friendly']):
+            return self.template_responses['ats']
+        elif any(word in user_lower for word in ['skills', 'technical skills', 'abilities']):
+            return self.template_responses['skills']
+        elif any(word in user_lower for word in ['keywords', 'keyword', 'terms']):
+            return self.template_responses['keywords']
+        elif any(word in user_lower for word in ['format', 'formatting', 'layout', 'design']):
+            return self.template_responses['format']
+        return None
+    def create_simple_prompt(self, user_input: str, resume_context: str = "") -> str:
+        """Create a very simple, clear prompt"""
+        # Try template response first
+        template_response = self.get_template_response(user_input)
+        if template_response:
+            return template_response
+        # Extract key terms
+        key_terms = self.nlp_processor.extract_key_terms(user_input)
+        # Create simple prompt
         if resume_context:
+            context_snippet = resume_context[:100].replace('\n', ' ')
+            prompt = f"Resume help: {context_snippet}\nQuestion: {user_input}\nAdvice:"
+        else:
+            prompt = f"Resume question: {user_input}\nHelpful advice:"
+        return prompt
     def generate_response(self, user_input: str, resume_context: str = "") -> str:
+        """Generate response with better quality control"""
         if not self.is_loaded:
             return "Please initialize the AI model first by clicking 'Initialize AI'."
+        # Check for template response first
+        template_response = self.get_template_response(user_input)
+        if template_response:
+            self.memory.add_conversation(user_input, template_response)
+            return template_response
         try:
+            # Create simple prompt
+            prompt = self.create_simple_prompt(user_input, resume_context)
+            # If prompt is a template response, return it directly
+            if prompt in self.template_responses.values():
+                self.memory.add_conversation(user_input, prompt)
+                return prompt
+            # Generate with model
             result = self.pipeline(
                 prompt,
+                max_new_tokens=40,
                 num_return_sequences=1,
                 temperature=0.7,
                 do_sample=True,
+                top_p=0.9,
+                repetition_penalty=1.3
             )
             # Extract and clean response
             generated_text = result[0]['generated_text']
             response = generated_text.replace(prompt, "").strip()
+            # Clean the response thoroughly
+            response = self.clean_response_thoroughly(response, user_input)
             # Add to memory
             self.memory.add_conversation(user_input, response)
             return response
         except Exception as e:
+            error_response = f"I encountered an error. Here's some general advice: {self.get_general_advice(user_input)}"
+            self.memory.add_conversation(user_input, error_response)
+            return error_response
+    def get_general_advice(self, user_input: str) -> str:
+        """Fallback advice for when model fails"""
+        user_lower = user_input.lower()
+        if 'experience' in user_lower:
+            return "Focus on achievements with numbers, use action verbs, and show results."
+        elif 'skill' in user_lower:
+            return "List skills that match the job description and organize them by category."
+        elif 'ats' in user_lower:
+            return "Use standard headings, include keywords, and avoid complex formatting."
+        else:
+            return "Make sure your resume is clear, relevant to the job, and easy to read."
+    def clean_response_thoroughly(self, response: str, user_input: str) -> str:
+        """Thoroughly clean the generated response"""
+        if not response or len(response.strip()) < 5:
+            return self.get_general_advice(user_input)
+        # Remove common problematic patterns
+        response = re.sub(r'\|[^|]*\|', '', response)  # Remove pipe-separated content
+        response = re.sub(r'Advice:\s*', '', response)  # Remove "Advice:" repetition
+        response = re.sub(r'\s+', ' ', response)  # Replace multiple spaces
+        response = re.sub(r'[.]{2,}', '.', response)  # Replace multiple periods
+        # Split into sentences and filter
         sentences = [s.strip() for s in response.split('.') if s.strip()]
         good_sentences = []
+        seen_content = set()
+        for sentence in sentences[:2]:  # Max 2 sentences
+            if (len(sentence) > 15 and
+                sentence.lower() not in seen_content and
+                not sentence.lower().startswith(('you are', 'i am', 'as a', 'how do')) and
+                'advice' not in sentence.lower()):
                 good_sentences.append(sentence)
+                seen_content.add(sentence.lower())
         if good_sentences:
             response = '. '.join(good_sentences)
             if not response.endswith('.'):
                 response += '.'
         else:
+            response = self.get_general_advice(user_input)
+        return response.strip()
+def create_improved_chat_interface(resume_context: str = ""):
+    """Create improved chat interface"""
+    st.header("🤖 AI Resume Assistant")
+    # Initialize chatbot
+    if 'improved_chatbot' not in st.session_state:
+        st.session_state.improved_chatbot = ImprovedCPUChatbot()
+    chatbot = st.session_state.improved_chatbot
+    # Model initialization
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        st.info("Using DistilGPT2 with improved response quality")
+    with col2:
+        if st.button("Initialize AI", type="primary"):
+            chatbot.initialize()
+    # Chat interface
+    if chatbot.is_loaded:
+        st.success("✅ AI Ready")
+        # Quick questions
+        st.subheader("Quick Questions")
+        col1, col2 = st.columns(2)
+        with col1:
+            if st.button("How to improve experience section?"):
+                st.session_state.quick_question = "What's wrong with my experience section?"
+        with col2:
+            if st.button("Make resume ATS-friendly?"):
+                st.session_state.quick_question = "How do I make it more ATS-friendly?"
+        col3, col4 = st.columns(2)
+        with col3:
+            if st.button("Add better keywords?"):
+                st.session_state.quick_question = "What keywords should I add?"
+        with col4:
+            if st.button("Improve skills section?"):
+                st.session_state.quick_question = "How can I improve my skills section?"
+        # Chat input
+        user_question = st.text_input(
+            "Ask about your resume:",
+            value=st.session_state.get('quick_question', ''),
+            placeholder="How can I improve my resume?",
+            key="improved_chat_input"
+        )
+        # Send button and clear
+        col1, col2 = st.columns([1, 3])
+        with col1:
+            send_clicked = st.button("Send", type="primary")
+        with col2:
+            if st.button("Clear Chat"):
+                st.session_state.improved_chat_history = []
+                if 'quick_question' in st.session_state:
+                    del st.session_state.quick_question
+                st.experimental_rerun()
+        # Generate response
+        if send_clicked and user_question.strip():
+            with st.spinner("Generating advice..."):
+                response = chatbot.generate_response(user_question, resume_context)
+                if 'quick_question' in st.session_state:
+                    del st.session_state.quick_question
+                st.experimental_rerun()
+        # Display chat history
+        if st.session_state.improved_chat_history:
+            st.subheader("💬 Conversation")
+            for conv in reversed(st.session_state.improved_chat_history[-3:]):  # Show last 3
+                st.markdown(f"**You:** {conv['user']}")
+                st.markdown(f"**AI:** {conv['bot']}")
+                st.caption(f"Time: {conv['timestamp']}")
+                st.divider()
+    else:
+        st.warning("Click 'Initialize AI' to start chatting")
+        with st.expander("ℹ️ Improved Features"):
+            st.markdown("""
+            **Improvements in this version:**
+            ✅ **Better response quality** - Reduced repetition and loops
+            ✅ **Template responses** - Instant answers for common questions
+            ✅ **Improved prompting** - Cleaner, more focused prompts
+            ✅ **Response filtering** - Better cleaning of generated text
+            ✅ **Quick questions** - Pre-defined buttons for common queries
+            **Model**: DistilGPT2 with enhanced parameters
+            **Response time**: 1-3 seconds
+            **Quality**: Significantly improved over basic version
+            """)
 # Download NLTK data if not already present
 @st.cache_resource
 def download_nltk_data():
         doc.build(story)
         buffer.seek(0)
         return buffer
 def main():
     st.set_page_config(
         page_title="AI Resume Analyzer with Chatbot",
                 # Chat Interface - Properly indented inside the file upload condition
                 st.markdown("---")
                 st.header("💬 Chat with Resume Assistant")
+                create_improved_chat_interface(st.session_state.get('resume_context', ''))
             except Exception as e:
                 st.error(f"Error during analysis: {str(e)}")