Update modules/semantic_analysis.py
Browse files- modules/semantic_analysis.py +71 -48
modules/semantic_analysis.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#
|
| 2 |
import streamlit as st
|
| 3 |
import spacy
|
| 4 |
import networkx as nx
|
|
@@ -84,71 +84,94 @@ POS_TRANSLATIONS = {
|
|
| 84 |
}
|
| 85 |
}
|
| 86 |
########################################################################################################################################
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
for
|
| 97 |
-
if
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
G, word_colors = create_syntax_graph(doc, lang)
|
| 119 |
|
| 120 |
-
plt.figure(figsize=(24, 18))
|
| 121 |
-
pos = nx.spring_layout(G, k=0.9, iterations=50)
|
| 122 |
|
| 123 |
node_colors = [data['color'] for _, data in G.nodes(data=True)]
|
| 124 |
node_sizes = [data['size'] for _, data in G.nodes(data=True)]
|
| 125 |
|
| 126 |
nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=node_sizes, arrows=True,
|
| 127 |
-
arrowsize=20, width=2, edge_color='gray')
|
| 128 |
|
| 129 |
nx.draw_networkx_labels(G, pos, {node: data['label'] for node, data in G.nodes(data=True)},
|
| 130 |
-
font_size=10, font_weight='bold')
|
| 131 |
|
| 132 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
| 133 |
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
|
| 134 |
|
| 135 |
-
plt.title("
|
| 136 |
-
fontsize=20, fontweight='bold')
|
| 137 |
plt.axis('off')
|
| 138 |
|
| 139 |
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, edgecolor='none',
|
| 140 |
label=f"{POS_TRANSLATIONS[lang][pos]} ({count_pos(doc)[pos]})")
|
| 141 |
for pos, color in POS_COLORS.items() if pos in set(nx.get_node_attributes(G, 'pos').values())]
|
| 142 |
-
plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12)
|
| 143 |
|
| 144 |
return plt
|
| 145 |
-
################################################################################################################################
|
| 146 |
-
def visualize_syntax(text, nlp, lang):
|
| 147 |
-
max_tokens = 5000
|
| 148 |
-
doc = nlp(text)
|
| 149 |
-
if len(doc) > max_tokens:
|
| 150 |
-
doc = nlp(text[:max_tokens])
|
| 151 |
-
print(f"Warning: The input text is too long. Only the first {max_tokens} tokens will be visualized.")
|
| 152 |
-
return visualize_syntax_graph(doc, lang)
|
| 153 |
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#semantic_analysis.py
|
| 2 |
import streamlit as st
|
| 3 |
import spacy
|
| 4 |
import networkx as nx
|
|
|
|
| 84 |
}
|
| 85 |
}
|
| 86 |
########################################################################################################################################
|
| 87 |
+
|
| 88 |
+
def extract_entities(doc):
|
| 89 |
+
entities = {
|
| 90 |
+
"Personas": [],
|
| 91 |
+
"Conceptos": [],
|
| 92 |
+
"Lugares": [],
|
| 93 |
+
"Fechas": []
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
for ent in doc.ents:
|
| 97 |
+
if ent.label_ == "PER":
|
| 98 |
+
entities["Personas"].append(ent.text)
|
| 99 |
+
elif ent.label_ in ["LOC", "GPE"]:
|
| 100 |
+
entities["Lugares"].append(ent.text)
|
| 101 |
+
elif ent.label_ == "DATE":
|
| 102 |
+
entities["Fechas"].append(ent.text)
|
| 103 |
+
else:
|
| 104 |
+
entities["Conceptos"].append(ent.text)
|
| 105 |
+
|
| 106 |
+
return entities
|
| 107 |
+
|
| 108 |
+
def visualize_context_graph(doc, lang):
|
| 109 |
+
G = nx.Graph()
|
| 110 |
+
entities = extract_entities(doc)
|
| 111 |
+
|
| 112 |
+
# Add nodes
|
| 113 |
+
for category, items in entities.items():
|
| 114 |
+
for item in items:
|
| 115 |
+
G.add_node(item, category=category)
|
| 116 |
+
|
| 117 |
+
# Add edges
|
| 118 |
+
for sent in doc.sents:
|
| 119 |
+
sent_entities = [ent.text for ent in sent.ents if ent.text in G.nodes()]
|
| 120 |
+
for i in range(len(sent_entities)):
|
| 121 |
+
for j in range(i+1, len(sent_entities)):
|
| 122 |
+
G.add_edge(sent_entities[i], sent_entities[j])
|
| 123 |
+
|
| 124 |
+
# Visualize
|
| 125 |
+
plt.figure(figsize=(20, 15))
|
| 126 |
+
pos = nx.spring_layout(G, k=0.5, iterations=50)
|
| 127 |
+
|
| 128 |
+
color_map = {"Personas": "lightblue", "Conceptos": "lightgreen", "Lugares": "lightcoral", "Fechas": "lightyellow"}
|
| 129 |
+
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()]
|
| 130 |
+
|
| 131 |
+
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=3000, font_size=8, font_weight='bold')
|
| 132 |
+
|
| 133 |
+
# Add a legend
|
| 134 |
+
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none') for color in color_map.values()]
|
| 135 |
+
plt.legend(legend_elements, color_map.keys(), loc='upper left', bbox_to_anchor=(1, 1))
|
| 136 |
+
|
| 137 |
+
plt.title("An谩lisis de Contexto" if lang == 'es' else "Context Analysis" if lang == 'en' else "Analyse de Contexte", fontsize=20)
|
| 138 |
+
plt.axis('off')
|
| 139 |
+
|
| 140 |
+
return plt
|
| 141 |
+
|
| 142 |
+
def visualize_semantic_relations(doc, lang):
|
| 143 |
+
# Esta funci贸n puede mantener la l贸gica que ya tienes en visualize_syntax_graph
|
| 144 |
+
# con algunas modificaciones para enfocarse en relaciones sem谩nticas
|
| 145 |
G, word_colors = create_syntax_graph(doc, lang)
|
| 146 |
|
| 147 |
+
plt.figure(figsize=(24, 18))
|
| 148 |
+
pos = nx.spring_layout(G, k=0.9, iterations=50)
|
| 149 |
|
| 150 |
node_colors = [data['color'] for _, data in G.nodes(data=True)]
|
| 151 |
node_sizes = [data['size'] for _, data in G.nodes(data=True)]
|
| 152 |
|
| 153 |
nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=node_sizes, arrows=True,
|
| 154 |
+
arrowsize=20, width=2, edge_color='gray')
|
| 155 |
|
| 156 |
nx.draw_networkx_labels(G, pos, {node: data['label'] for node, data in G.nodes(data=True)},
|
| 157 |
+
font_size=10, font_weight='bold')
|
| 158 |
|
| 159 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
| 160 |
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
|
| 161 |
|
| 162 |
+
plt.title("An谩lisis de Relaciones Sem谩nticas" if lang == 'es' else "Semantic Relations Analysis" if lang == 'en' else "Analyse des Relations S茅mantiques",
|
| 163 |
+
fontsize=20, fontweight='bold')
|
| 164 |
plt.axis('off')
|
| 165 |
|
| 166 |
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, edgecolor='none',
|
| 167 |
label=f"{POS_TRANSLATIONS[lang][pos]} ({count_pos(doc)[pos]})")
|
| 168 |
for pos, color in POS_COLORS.items() if pos in set(nx.get_node_attributes(G, 'pos').values())]
|
| 169 |
+
plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12)
|
| 170 |
|
| 171 |
return plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
+
def perform_semantic_analysis(text, nlp, lang):
|
| 174 |
+
doc = nlp(text)
|
| 175 |
+
context_graph = visualize_context_graph(doc, lang)
|
| 176 |
+
relations_graph = visualize_semantic_relations(doc, lang)
|
| 177 |
+
return context_graph, relations_graph
|