from fastapi import FastAPI, UploadFile, File import json, re, io from llama_cpp import Llama from PyPDF2 import PdfReader from docx import Document import os # ✅ Load Mistral 7B Model from Hugging Face Model Hub MODEL_PATH = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF" print(f"🔹 Loading Mistral 7B from Hugging Face Model Hub: {MODEL_PATH} (This may take a while)") llm = Llama.from_pretrained(MODEL_PATH, n_ctx=4096, n_gpu_layers=-1) # Use GPU if available print("✅ Model loaded successfully!") app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B") # ✅ Extract Text from PDF or DOCX def extract_text_from_resume(uploaded_file): file_content = uploaded_file.file.read() file_stream = io.BytesIO(file_content) if uploaded_file.filename.endswith(".pdf"): reader = PdfReader(file_stream) return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) elif uploaded_file.filename.endswith(".docx"): doc = Document(file_stream) return "\n".join([para.text for para in doc.paragraphs]) return None # ✅ Extract Email & Phone Number def extract_email_phone(text): email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}" phone_pattern = r"\+?\d{1,3}?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}" email_match = re.search(email_pattern, text) phone_match = re.search(phone_pattern, text) return { "email": email_match.group() if email_match else "Email not found", "phone": phone_match.group() if phone_match else "Phone not found" } # ✅ Analyze Resume using Mistral 7B def analyze_resume(text): truncated_text = text[:3500] # Keep within context limit prompt = f""" Extract these details from the resume: 1. Full Name 2. Work Experience (Company Names, Roles, Responsibilities, Duration) 3. Qualifications (Degrees, Certifications) 4. List of Skills Resume Text: {truncated_text} Format response as a **strict JSON object**: {{ "name": "Candidate Name", "experience": [ {{ "company": "Company Name", "role": "Job Title", "duration": "Start Date - End Date", "responsibilities": "Brief work responsibilities" }} ], "qualifications": "Degree, Certifications", "skills": ["List of skills"] }} """ response = llm(prompt, max_tokens=700) output = response["choices"][0]["text"].strip() print("🔹 Raw LLaMA Output:\n", output) try: return json.loads(output) except json.JSONDecodeError: return {"error": "Failed to parse LLaMA output", "raw_output": output} # ✅ FastAPI Route to Parse Resume @app.post("/parse-resume/") async def parse_resume(file: UploadFile = File(...)): text = extract_text_from_resume(file) if not text: return {"error": "Unsupported file format or could not extract text"} extracted_info = extract_email_phone(text) llm_data = analyze_resume(text) extracted_info.update(llm_data) return {"success": True, "data": extracted_info}