QuentinL52 commited on
Commit
2a885ce
·
verified ·
1 Parent(s): 833f869

Update src/cv_parsing_agents.py

Browse files
Files changed (1) hide show
  1. src/cv_parsing_agents.py +52 -44
src/cv_parsing_agents.py CHANGED
@@ -1,50 +1,58 @@
1
- import os
2
- import json
 
3
 
4
- from src.crew.crew_pool import analyse_cv
5
- from src.config import load_pdf
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- def clean_dict_keys(data):
8
- if isinstance(data, dict):
9
- return {str(key): clean_dict_keys(value) for key, value in data.items()}
10
- elif isinstance(data, list):
11
- return [clean_dict_keys(element) for element in data]
12
- else:
13
- return data
14
 
15
- class CvParserAgent:
16
- def __init__(self, pdf_path: str):
17
- self.pdf_path = pdf_path
18
-
19
- def process(self) -> dict:
20
- """
21
- Traite le fichier PDF pour en extraire le contenu sous forme de JSON.
22
- Ne se connecte à aucune base de données.
23
-
24
- Retourne :
25
- Un dictionnaire contenant les données extraites du CV, ou None en cas d'erreur.
26
- """
27
- print(f"Début du traitement du CV : {self.pdf_path}")
28
 
29
- try:
30
- cv_text_content = load_pdf(self.pdf_path)
31
- crew_output = analyse_cv(cv_text_content)
32
 
33
- if not crew_output or not hasattr(crew_output, 'raw') or not crew_output.raw.strip():
34
- print("Erreur : L'analyse par le crew n'a pas retourné de résultat.")
35
- return None
36
- raw_string = crew_output.raw
37
- json_string_cleaned = raw_string
38
- if '```' in raw_string:
39
- json_part = raw_string.split('```json')[1].split('```')[0]
40
- json_string_cleaned = json_part.strip()
41
- profile_data = json.loads(json_string_cleaned)
42
- return clean_dict_keys(profile_data)
 
 
43
 
44
- except json.JSONDecodeError as e:
45
- print(f"Erreur de décodage JSON : {e}")
46
- print(f"Données brutes reçues : {crew_output.raw}")
47
- return None
48
- except Exception as e:
49
- print(f"Une erreur inattendue est survenue dans CvParserAgent : {e}")
50
- return None
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline
3
+ from sentence_transformers import SentenceTransformer, util
4
 
5
+ class MultiModelInterviewAnalyzer:
6
+ def __init__(self):
7
+ self.sentiment_analyzer = pipeline(
8
+ "text-classification",
9
+ model="astrosbd/french_emotion_camembert",
10
+ return_all_scores=True,
11
+ device=0 if torch.cuda.is_available() else -1,
12
+ )
13
+ self.similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
14
+ self.intent_classifier = pipeline(
15
+ "zero-shot-classification",
16
+ model="joeddav/xlm-roberta-large-xnli"
17
+ #device=0 if torch.cuda.is_available() else -1,
18
+ )
19
 
20
+ def analyze_sentiment(self, messages):
21
+ user_messages = [msg['content'] for msg in messages if msg['role'] == 'user']
22
+ if not user_messages:
23
+ return []
24
+ sentiments = self.sentiment_analyzer(user_messages)
25
+ return sentiments
 
26
 
27
+ def compute_semantic_similarity(self, messages, job_requirements):
28
+ user_answers = " ".join([msg['content'] for msg in messages if msg['role'] == 'user'])
29
+ embedding_answers = self.similarity_model.encode(user_answers, convert_to_tensor=True)
30
+ embedding_requirements = self.similarity_model.encode(job_requirements, convert_to_tensor=True)
31
+ cosine_score = util.cos_sim(embedding_answers, embedding_requirements)
 
 
 
 
 
 
 
 
32
 
33
+ return cosine_score.max().item()
 
 
34
 
35
+ def classify_candidate_intent(self, messages):
36
+ user_answers = [msg['content'] for msg in messages if msg['role'] == 'user']
37
+ if not user_answers:
38
+ return []
39
+ candidate_labels = [
40
+ "parle de son expérience technique",
41
+ "exprime sa motivation",
42
+ "pose une question",
43
+ "exprime de l’incertitude ou du stress"
44
+ ]
45
+ classifications = self.intent_classifier(user_answers, candidate_labels, multi_label=False)
46
+ return classifications
47
 
48
+ def run_full_analysis(self, conversation_history, job_requirements):
49
+ sentiment_results = self.analyze_sentiment(conversation_history)
50
+ similarity_score = self.compute_semantic_similarity(conversation_history, job_requirements)
51
+ intent_results = self.classify_candidate_intent(conversation_history)
52
+ analysis_output = {
53
+ "overall_similarity_score": round(similarity_score, 2),
54
+ "sentiment_analysis": sentiment_results,
55
+ "intent_analysis": intent_results,
56
+ "raw_transcript": conversation_history
57
+ }
58
+ return analysis_output