import json import logging import os from datetime import datetime from typing import Dict, Any, List from pymongo import MongoClient from src.config import load_pdf from src.agents.cv_agents import CVAgentOrchestrator from src.agents.scoring_agent import SimpleScoringAgent logger = logging.getLogger(__name__) class CVParsingService: def __init__(self, models: Dict[str, Any]): self.models = models self.orchestrator = CVAgentOrchestrator(models.get("llm")) self.scoring_agent = SimpleScoringAgent() # Initialisation MongoDB try: self.client = MongoClient(os.getenv("MONGO_URI")) self.db = self.client[os.getenv("MONGO_DB_NAME")] self.candidate_collection = self.db[os.getenv("MONGO_CV_COLLECTION")] except: self.client = None self.candidate_collection = None def parse_cv(self, pdf_path: str, user_id: str = None) -> Dict[str, Any]: cv_text = load_pdf(pdf_path) if not cv_text or not cv_text.strip(): return self._create_fallback_data() logger.info(f"CV text loaded: {len(cv_text)} characters") sections = self.orchestrator.split_cv_sections(cv_text) logger.info(f"Sections extracted: {list(sections.keys())}") cv_data = self.orchestrator.extract_all_sections(sections) logger.info(f"CV data extracted: {cv_data is not None}") if not cv_data or not cv_data.get("candidat") or not self._is_valid_extraction(cv_data): logger.warning("Agent extraction failed or incomplete, using fallback extraction") return self._create_fallback_data() logger.info("Calculating skill levels...") scores = self.scoring_agent.calculate_scores(cv_data["candidat"]) if scores and scores.get("analyse_competences"): cv_data["candidat"].update(scores) skills_count = len(scores.get("analyse_competences", [])) levels_summary = self._get_levels_summary(scores.get("analyse_competences", [])) logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}") else: logger.warning("No skill levels calculated, adding empty analysis") cv_data["candidat"]["analyse_competences"] = [] self._save_profile(cv_data, user_id) return cv_data def _save_profile(self, cv_data: Dict[str, Any], user_id: str = None): """ Sauvegarde le CV avec la structure complète incluant la clé 'candidat' """ if self.candidate_collection is None or not isinstance(cv_data, dict): return try: # Garder la structure complète avec la clé 'candidat' profile_data = cv_data.copy() profile_data["created_at"] = datetime.utcnow() profile_data["updated_at"] = datetime.utcnow() if user_id: profile_data["user_id"] = user_id self.candidate_collection.insert_one(profile_data) logger.info("CV stocké dans MongoDB avec succès") except Exception as e: logger.error(f"Erreur stockage CV: {e}") def _get_levels_summary(self, competences: List[Dict[str, Any]]) -> str: levels_count = {} for comp in competences: level = comp.get("level", "unknown") levels_count[level] = levels_count.get(level, 0) + 1 return ", ".join([f"{count} {level}" for level, count in levels_count.items()]) def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool: candidat = cv_data.get("candidat", {}) has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip()) has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or candidat.get("compétences", {}).get("soft_skills", [])) has_experience = bool(candidat.get("expériences", [])) return has_info or has_skills or has_experience