Spaces:
Build error
Build error
| from fastapi import FastAPI, UploadFile, File | |
| import json, re, io, os | |
| from llama_cpp import Llama | |
| from PyPDF2 import PdfReader | |
| from docx import Document | |
| from huggingface_hub import hf_hub_download | |
| # ✅ Define model details | |
| MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF" | |
| MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q3_K_M.gguf" # ✅ Use smaller Q3 model for faster CPU inference | |
| CACHE_DIR = "/tmp/hf_cache" | |
| # ✅ Check if model exists in cache before downloading | |
| LOCAL_MODEL_PATH = os.path.join(CACHE_DIR, MODEL_FILE) | |
| if not os.path.exists(LOCAL_MODEL_PATH): | |
| print(f"🔹 Model not found locally. Downloading from Hugging Face...") | |
| LOCAL_MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, cache_dir=CACHE_DIR) | |
| print(f"✅ Model downloaded and cached at {LOCAL_MODEL_PATH}") | |
| else: | |
| print(f"✅ Model already cached at {LOCAL_MODEL_PATH}") | |
| # ✅ Load Mistral 7B with Optimized Settings | |
| print(f"🔹 Loading Mistral 7B (Q3_K_M) from {LOCAL_MODEL_PATH} (This may take a while)") | |
| llm = Llama(model_path=LOCAL_MODEL_PATH, n_ctx=2048, n_gpu_layers=0) # ✅ Reduce context length for speed | |
| print("✅ Model loaded successfully!") | |
| app = FastAPI(title="Resume Parsing API", description="Extracts key details from resumes using Mistral 7B") | |
| # ✅ Extract Text from PDF or DOCX | |
| def extract_text_from_resume(uploaded_file): | |
| file_content = uploaded_file.file.read() | |
| file_stream = io.BytesIO(file_content) | |
| if uploaded_file.filename.endswith(".pdf"): | |
| reader = PdfReader(file_stream) | |
| return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) | |
| elif uploaded_file.filename.endswith(".docx"): | |
| doc = Document(file_stream) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |
| return None | |
| # ✅ Extract Email & Phone Number | |
| def extract_email_phone(text): | |
| email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}" | |
| phone_pattern = r"\+?\d{1,3}?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}" | |
| email_match = re.search(email_pattern, text) | |
| phone_match = re.search(phone_pattern, text) | |
| return { | |
| "email": email_match.group() if email_match else "Email not found", | |
| "phone": phone_match.group() if phone_match else "Phone not found" | |
| } | |
| # ✅ Analyze Resume using Mistral 7B | |
| def analyze_resume(text): | |
| truncated_text = text[:2048] # ✅ Further reduced text size for faster processing | |
| prompt = f""" | |
| Extract these details from the resume: | |
| 1. Full Name | |
| 2. Work Experience (Company Names, Roles, Responsibilities, Duration) | |
| 3. Qualifications (Degrees, Certifications) | |
| 4. List of Skills | |
| Resume Text: {truncated_text} | |
| Format response as a **strict JSON object**: | |
| {{ | |
| "name": "Candidate Name", | |
| "experience": [ | |
| {{ | |
| "company": "Company Name", | |
| "role": "Job Title", | |
| "duration": "Start Date - End Date", | |
| "responsibilities": "Brief work responsibilities" | |
| }} | |
| ], | |
| "qualifications": "Degree, Certifications", | |
| "skills": ["List of skills"] | |
| }} | |
| """ | |
| response = llm(prompt, max_tokens=500) # ✅ Reduced max tokens for quicker response | |
| output = response["choices"][0]["text"].strip() | |
| print("🔹 Raw LLaMA Output:\n", output) | |
| try: | |
| return json.loads(output) | |
| except json.JSONDecodeError: | |
| return {"error": "Failed to parse LLaMA output", "raw_output": output} | |
| # ✅ FastAPI Route to Parse Resume | |
| async def parse_resume(file: UploadFile = File(...)): | |
| text = extract_text_from_resume(file) | |
| if not text: | |
| return {"error": "Unsupported file format or could not extract text"} | |
| extracted_info = extract_email_phone(text) | |
| llm_data = analyze_resume(text) | |
| extracted_info.update(llm_data) | |
| return {"success": True, "data": extracted_info} | |