QuentinL52 commited on
Commit
d79ca2a
·
verified ·
1 Parent(s): ce655be

Rename src/services/cv_service.py to src/services/graph_service.py

Browse files
src/services/cv_service.py DELETED
@@ -1,91 +0,0 @@
1
- import json
2
- import logging
3
- import os
4
- from datetime import datetime
5
- from typing import Dict, Any, List
6
- from pymongo import MongoClient
7
- from src.config import load_pdf
8
- from src.agents.cv_agents import CVAgentOrchestrator
9
- from src.agents.scoring_agent import SimpleScoringAgent
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- class CVParsingService:
14
- def __init__(self, models: Dict[str, Any]):
15
- self.models = models
16
- self.orchestrator = CVAgentOrchestrator(models.get("llm"))
17
- self.scoring_agent = SimpleScoringAgent()
18
-
19
- # Initialisation MongoDB
20
- try:
21
- self.client = MongoClient(os.getenv("MONGO_URI"))
22
- self.db = self.client[os.getenv("MONGO_DB_NAME")]
23
- self.candidate_collection = self.db[os.getenv("MONGO_CV_COLLECTION")]
24
- except:
25
- self.client = None
26
- self.candidate_collection = None
27
-
28
- def parse_cv(self, pdf_path: str, user_id: str = None) -> Dict[str, Any]:
29
- cv_text = load_pdf(pdf_path)
30
- if not cv_text or not cv_text.strip():
31
- return self._create_fallback_data()
32
-
33
- logger.info(f"CV text loaded: {len(cv_text)} characters")
34
- sections = self.orchestrator.split_cv_sections(cv_text)
35
- logger.info(f"Sections extracted: {list(sections.keys())}")
36
- cv_data = self.orchestrator.extract_all_sections(sections)
37
- logger.info(f"CV data extracted: {cv_data is not None}")
38
-
39
- if not cv_data or not cv_data.get("candidat") or not self._is_valid_extraction(cv_data):
40
- logger.warning("Agent extraction failed or incomplete, using fallback extraction")
41
- return self._create_fallback_data()
42
-
43
- logger.info("Calculating skill levels...")
44
- scores = self.scoring_agent.calculate_scores(cv_data["candidat"])
45
- if scores and scores.get("analyse_competences"):
46
- cv_data["candidat"].update(scores)
47
- skills_count = len(scores.get("analyse_competences", []))
48
- levels_summary = self._get_levels_summary(scores.get("analyse_competences", []))
49
- logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}")
50
- else:
51
- logger.warning("No skill levels calculated, adding empty analysis")
52
- cv_data["candidat"]["analyse_competences"] = []
53
- self._save_profile(cv_data, user_id)
54
-
55
- return cv_data
56
-
57
- def _save_profile(self, cv_data: Dict[str, Any], user_id: str = None):
58
- """
59
- Sauvegarde le CV avec la structure complète incluant la clé 'candidat'
60
- """
61
- if self.candidate_collection is None or not isinstance(cv_data, dict):
62
- return
63
-
64
- try:
65
- # Garder la structure complète avec la clé 'candidat'
66
- profile_data = cv_data.copy()
67
- profile_data["created_at"] = datetime.utcnow()
68
- profile_data["updated_at"] = datetime.utcnow()
69
-
70
- if user_id:
71
- profile_data["user_id"] = user_id
72
-
73
- self.candidate_collection.insert_one(profile_data)
74
- logger.info("CV stocké dans MongoDB avec succès")
75
- except Exception as e:
76
- logger.error(f"Erreur stockage CV: {e}")
77
-
78
- def _get_levels_summary(self, competences: List[Dict[str, Any]]) -> str:
79
- levels_count = {}
80
- for comp in competences:
81
- level = comp.get("level", "unknown")
82
- levels_count[level] = levels_count.get(level, 0) + 1
83
- return ", ".join([f"{count} {level}" for level, count in levels_count.items()])
84
-
85
- def _is_valid_extraction(self, cv_data: Dict[str, Any]) -> bool:
86
- candidat = cv_data.get("candidat", {})
87
- has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
88
- has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or
89
- candidat.get("compétences", {}).get("soft_skills", []))
90
- has_experience = bool(candidat.get("expériences", []))
91
- return has_info or has_skills or has_experience
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/services/graph_service.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import json
4
+ from typing import TypedDict, Annotated, Sequence, Dict, Any, List
5
+
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_core.runnables import Runnable
8
+ from langchain_core.messages import BaseMessage, AIMessage, HumanMessage
9
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
+ from langgraph.graph import StateGraph, END
11
+ from langgraph.prebuilt import ToolNode
12
+
13
+ from tools.analysis_tools import trigger_interview_analysis
14
+
15
+ class AgentState(TypedDict):
16
+ messages: Annotated[Sequence[BaseMessage], lambda x, y: x + y]
17
+ user_id: str
18
+ job_offer_id: str
19
+ job_description: str
20
+
21
+ class GraphInterviewProcessor:
22
+ """
23
+ Cette classe encapsule la logique d'un entretien en utilisant LangGraph.
24
+ Elle prépare toutes les données nécessaires à l'initialisation.
25
+ """
26
+ def __init__(self, payload: Dict[str, Any]):
27
+ logging.info("Initialisation de GraphInterviewProcessor...")
28
+
29
+ self.user_id = payload["user_id"]
30
+ self.job_offer_id = payload["job_offer_id"]
31
+ self.job_offer = payload["job_offer"]
32
+ self.cv_data = payload.get("cv_document", {}).get('candidat', {})
33
+
34
+ if not self.cv_data:
35
+ raise ValueError("Données du candidat non trouvées dans le payload.")
36
+
37
+ self.system_prompt_template = self._load_prompt_template('prompts/rag_prompt_old.txt')
38
+ self.formatted_cv_str = self._format_cv_for_prompt()
39
+ self.skills_summary = self._extract_skills_summary()
40
+ self.reconversion_info = self._extract_reconversion_info()
41
+
42
+ self.agent_runnable = self._create_agent_runnable()
43
+ self.graph = self._build_graph()
44
+ logging.info("GraphInterviewProcessor initialisé avec succès.")
45
+
46
+ def _load_prompt_template(self, file_path: str) -> str:
47
+ try:
48
+ with open(file_path, 'r', encoding='utf-8') as f:
49
+ return f.read()
50
+ except FileNotFoundError:
51
+ logging.error(f"Fichier prompt introuvable: {file_path}")
52
+ return "Vous êtes un assistant RH."
53
+
54
+ def _format_cv_for_prompt(self) -> str:
55
+ return json.dumps(self.cv_data, indent=2, ensure_ascii=False)
56
+
57
+ def _extract_skills_summary(self) -> str:
58
+ competences = self.cv_data.get('analyse_competences', [])
59
+ if not competences: return "Aucune analyse de compétences disponible."
60
+ summary = [f"{comp.get('skill', '')}: {comp.get('level', 'débutant')}" for comp in competences]
61
+ return "Niveaux de compétences du candidat: " + " | ".join(summary)
62
+
63
+ def _extract_reconversion_info(self) -> str:
64
+ reconversion = self.cv_data.get('reconversion', {})
65
+ if reconversion.get('is_reconversion'):
66
+ return f"CANDIDAT EN RECONVERSION: {reconversion.get('analysis', '')}"
67
+ return "Le candidat n'est pas identifié comme étant en reconversion."
68
+
69
+ def _create_agent_runnable(self) -> Runnable:
70
+ """Crée une chaîne (runnable) qui agit comme notre agent."""
71
+ prompt = ChatPromptTemplate.from_messages([
72
+ ("system", "{system_prompt_content}"),
73
+ MessagesPlaceholder(variable_name="messages"),
74
+ ])
75
+ llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini", temperature=0.7)
76
+ tools = [trigger_interview_analysis]
77
+ llm_with_tools = llm.bind_tools(tools)
78
+ return prompt | llm_with_tools
79
+
80
+ def _agent_node(self, state: AgentState):
81
+ """Prépare le prompt et appelle le runnable de l'agent."""
82
+ job_description_str = json.dumps(self.job_offer, ensure_ascii=False)
83
+
84
+ system_prompt_content = self.system_prompt_template.format(
85
+ user_id=state['user_id'],
86
+ job_offer_id=state['job_offer_id'],
87
+ entreprise=self.job_offer.get('entreprise', 'notre entreprise'),
88
+ poste=self.job_offer.get('poste', 'ce poste'),
89
+ mission=self.job_offer.get('mission', 'Non spécifiée'),
90
+ profil_recherche=self.job_offer.get('profil_recherche', 'Non spécifié'),
91
+ competences=self.job_offer.get('competences', 'Non spécifiées'),
92
+ pole=self.job_offer.get('pole', 'Non spécifié'),
93
+ cv=self.formatted_cv_str,
94
+ skills_analysis=self.skills_summary,
95
+ reconversion_analysis=self.reconversion_info,
96
+ job_description=job_description_str
97
+ )
98
+
99
+ response = self.agent_runnable.invoke({
100
+ "system_prompt_content": system_prompt_content,
101
+ "messages": state["messages"]
102
+ })
103
+
104
+ return {"messages": [response]}
105
+
106
+ def _router(self, state: AgentState) -> str:
107
+ """Route le flux du graphe en fonction de la dernière réponse de l'agent."""
108
+ last_message = state["messages"][-1]
109
+ if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
110
+ if any(tool_call.get('name') == 'trigger_interview_analysis' for tool_call in last_message.tool_calls):
111
+ return "call_final_tool"
112
+ return "call_tool"
113
+ return "end_turn"
114
+
115
+ def _final_analysis_node(self, state: AgentState):
116
+ """
117
+ Appelle l'outil d'analyse finale. Construit les arguments manuellement
118
+ à partir de l'état du graphe pour garantir la fiabilité.
119
+ """
120
+ conversation_history = []
121
+ for msg in state["messages"]:
122
+ if isinstance(msg, HumanMessage):
123
+ role = "user"
124
+ elif isinstance(msg, AIMessage):
125
+ role = "assistant"
126
+ else:
127
+ continue
128
+ conversation_history.append({"role": role, "content": msg.content})
129
+
130
+ tool_input = {
131
+ "user_id": state['user_id'],
132
+ "job_offer_id": state['job_offer_id'],
133
+ "job_description": state['job_description'],
134
+ "conversation_history": conversation_history
135
+ }
136
+
137
+ trigger_interview_analysis.invoke(tool_input)
138
+ return {}
139
+
140
+ def _build_graph(self) -> any:
141
+ """Construit et compile le graphe d'états."""
142
+ tool_node = ToolNode([trigger_interview_analysis])
143
+
144
+ graph = StateGraph(AgentState)
145
+ graph.add_node("agent", self._agent_node)
146
+ graph.add_node("tools", tool_node)
147
+ graph.add_node("final_tool_node", self._final_analysis_node)
148
+
149
+ graph.set_entry_point("agent")
150
+
151
+ graph.add_conditional_edges(
152
+ "agent",
153
+ self._router,
154
+ {
155
+ "call_tool": "tools",
156
+ "call_final_tool": "final_tool_node",
157
+ "end_turn": END
158
+ }
159
+ )
160
+
161
+ graph.add_edge("tools", "agent")
162
+ graph.add_edge("final_tool_node", END)
163
+
164
+ return graph.compile()
165
+
166
+ def invoke(self, messages: List[Dict[str, Any]]):
167
+ """Point d'entrée pour lancer une conversation dans le graphe."""
168
+ langchain_messages = [HumanMessage(content=m["content"]) if m["role"] == "user" else AIMessage(content=m["content"]) for m in messages]
169
+
170
+ if not langchain_messages:
171
+ logging.info("Historique de conversation vide. Ajout d'un message de démarrage interne.")
172
+ langchain_messages.append(HumanMessage(content="Bonjour, je suis prêt à commencer l'entretien."))
173
+
174
+ initial_state = {
175
+ "user_id": self.user_id,
176
+ "job_offer_id": self.job_offer_id,
177
+ "messages": langchain_messages,
178
+ "job_description": json.dumps(self.job_offer, ensure_ascii=False),
179
+ }
180
+
181
+ final_state = self.graph.invoke(initial_state)
182
+
183
+ if not final_state or not final_state.get('messages'):
184
+ logging.error("L'état final est vide ou ne contient pas de messages.")
185
+ return {"response": "Erreur: Impossible de générer une réponse.", "status": "finished"}
186
+ last_message = final_state['messages'][-1]
187
+ status = "finished" if hasattr(last_message, 'tool_calls') and last_message.tool_calls else "interviewing"
188
+ response_content = last_message.content
189
+
190
+ return {
191
+ "response": response_content,
192
+ "status": status
193
+ }