Update modules/semantic_analysis.py
Browse files- modules/semantic_analysis.py +52 -19
modules/semantic_analysis.py
CHANGED
|
@@ -85,6 +85,29 @@ POS_TRANSLATIONS = {
|
|
| 85 |
}
|
| 86 |
########################################################################################################################################
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
def count_pos(doc):
|
| 89 |
return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
|
| 90 |
|
|
@@ -95,31 +118,27 @@ from collections import Counter
|
|
| 95 |
|
| 96 |
# Mant茅n las definiciones de POS_COLORS y POS_TRANSLATIONS que ya tienes
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
"Lugares": [],
|
| 103 |
-
"Fechas": []
|
| 104 |
-
}
|
| 105 |
-
|
| 106 |
for ent in doc.ents:
|
| 107 |
if ent.label_ == "PERSON":
|
| 108 |
-
entities[
|
| 109 |
elif ent.label_ in ["LOC", "GPE"]:
|
| 110 |
-
entities[
|
| 111 |
elif ent.label_ == "DATE":
|
| 112 |
-
entities[
|
| 113 |
else:
|
| 114 |
-
entities[
|
| 115 |
-
|
| 116 |
return entities
|
| 117 |
|
|
|
|
| 118 |
def visualize_context_graph(doc, lang):
|
| 119 |
G = nx.Graph()
|
| 120 |
-
entities = extract_entities(doc)
|
| 121 |
-
|
| 122 |
-
color_map = {"Personas": "lightblue", "Conceptos": "lightgreen", "Lugares": "lightcoral", "Fechas": "lightyellow"}
|
| 123 |
|
| 124 |
# Add nodes
|
| 125 |
for category, items in entities.items():
|
|
@@ -139,7 +158,8 @@ def visualize_context_graph(doc, lang):
|
|
| 139 |
|
| 140 |
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
|
| 141 |
|
| 142 |
-
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=
|
|
|
|
| 143 |
|
| 144 |
# Add a legend
|
| 145 |
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none', label=category)
|
|
@@ -151,6 +171,7 @@ def visualize_context_graph(doc, lang):
|
|
| 151 |
|
| 152 |
return plt
|
| 153 |
|
|
|
|
| 154 |
def visualize_semantic_relations(doc, lang):
|
| 155 |
G = nx.Graph()
|
| 156 |
word_freq = Counter(token.text.lower() for token in doc if token.pos_ not in ['PUNCT', 'SPACE'])
|
|
@@ -175,8 +196,12 @@ def visualize_semantic_relations(doc, lang):
|
|
| 175 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
| 176 |
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
plt.axis('off')
|
| 181 |
|
| 182 |
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
|
|
@@ -186,8 +211,16 @@ def visualize_semantic_relations(doc, lang):
|
|
| 186 |
|
| 187 |
return plt
|
| 188 |
|
|
|
|
|
|
|
| 189 |
def perform_semantic_analysis(text, nlp, lang):
|
| 190 |
doc = nlp(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
context_graph = visualize_context_graph(doc, lang)
|
| 192 |
relations_graph = visualize_semantic_relations(doc, lang)
|
| 193 |
return context_graph, relations_graph
|
|
|
|
| 85 |
}
|
| 86 |
########################################################################################################################################
|
| 87 |
|
| 88 |
+
# Definimos las etiquetas y colores para cada idioma
|
| 89 |
+
ENTITY_LABELS = {
|
| 90 |
+
'es': {
|
| 91 |
+
"Personas": "lightblue",
|
| 92 |
+
"Conceptos": "lightgreen",
|
| 93 |
+
"Lugares": "lightcoral",
|
| 94 |
+
"Fechas": "lightyellow"
|
| 95 |
+
},
|
| 96 |
+
'en': {
|
| 97 |
+
"People": "lightblue",
|
| 98 |
+
"Concepts": "lightgreen",
|
| 99 |
+
"Places": "lightcoral",
|
| 100 |
+
"Dates": "lightyellow"
|
| 101 |
+
},
|
| 102 |
+
'fr': {
|
| 103 |
+
"Personnes": "lightblue",
|
| 104 |
+
"Concepts": "lightgreen",
|
| 105 |
+
"Lieux": "lightcoral",
|
| 106 |
+
"Dates": "lightyellow"
|
| 107 |
+
}
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
#########################################################################################################
|
| 111 |
def count_pos(doc):
|
| 112 |
return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT')
|
| 113 |
|
|
|
|
| 118 |
|
| 119 |
# Mant茅n las definiciones de POS_COLORS y POS_TRANSLATIONS que ya tienes
|
| 120 |
|
| 121 |
+
#############################################################################################################################
|
| 122 |
+
def extract_entities(doc, lang):
|
| 123 |
+
entities = {label: [] for label in ENTITY_LABELS[lang].keys()}
|
| 124 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
for ent in doc.ents:
|
| 126 |
if ent.label_ == "PERSON":
|
| 127 |
+
entities[list(ENTITY_LABELS[lang].keys())[0]].append(ent.text)
|
| 128 |
elif ent.label_ in ["LOC", "GPE"]:
|
| 129 |
+
entities[list(ENTITY_LABELS[lang].keys())[2]].append(ent.text)
|
| 130 |
elif ent.label_ == "DATE":
|
| 131 |
+
entities[list(ENTITY_LABELS[lang].keys())[3]].append(ent.text)
|
| 132 |
else:
|
| 133 |
+
entities[list(ENTITY_LABELS[lang].keys())[1]].append(ent.text)
|
| 134 |
+
|
| 135 |
return entities
|
| 136 |
|
| 137 |
+
#####################################################################################################################
|
| 138 |
def visualize_context_graph(doc, lang):
|
| 139 |
G = nx.Graph()
|
| 140 |
+
entities = extract_entities(doc, lang)
|
| 141 |
+
color_map = ENTITY_LABELS[lang]
|
|
|
|
| 142 |
|
| 143 |
# Add nodes
|
| 144 |
for category, items in entities.items():
|
|
|
|
| 158 |
|
| 159 |
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
|
| 160 |
|
| 161 |
+
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=5000,
|
| 162 |
+
font_size=12, font_weight='bold')
|
| 163 |
|
| 164 |
# Add a legend
|
| 165 |
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none', label=category)
|
|
|
|
| 171 |
|
| 172 |
return plt
|
| 173 |
|
| 174 |
+
############################################################################################################################################
|
| 175 |
def visualize_semantic_relations(doc, lang):
|
| 176 |
G = nx.Graph()
|
| 177 |
word_freq = Counter(token.text.lower() for token in doc if token.pos_ not in ['PUNCT', 'SPACE'])
|
|
|
|
| 196 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
| 197 |
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
|
| 198 |
|
| 199 |
+
title = {
|
| 200 |
+
'es': "Relaciones Sem谩nticas Relevantes",
|
| 201 |
+
'en': "Relevant Semantic Relations",
|
| 202 |
+
'fr': "Relations S茅mantiques Pertinentes"
|
| 203 |
+
}
|
| 204 |
+
plt.title(title[lang], fontsize=20, fontweight='bold')
|
| 205 |
plt.axis('off')
|
| 206 |
|
| 207 |
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
|
|
|
|
| 211 |
|
| 212 |
return plt
|
| 213 |
|
| 214 |
+
|
| 215 |
+
############################################################################################################################################
|
| 216 |
def perform_semantic_analysis(text, nlp, lang):
|
| 217 |
doc = nlp(text)
|
| 218 |
+
|
| 219 |
+
# Imprimir entidades para depuraci贸n
|
| 220 |
+
print(f"Entidades encontradas ({lang}):")
|
| 221 |
+
for ent in doc.ents:
|
| 222 |
+
print(f"{ent.text} - {ent.label_}")
|
| 223 |
+
|
| 224 |
context_graph = visualize_context_graph(doc, lang)
|
| 225 |
relations_graph = visualize_semantic_relations(doc, lang)
|
| 226 |
return context_graph, relations_graph
|