Update modules/semantic_analysis.py
Browse files
modules/semantic_analysis.py
CHANGED
|
@@ -183,14 +183,16 @@ def visualize_semantic_relations(doc, lang):
|
|
| 183 |
G = nx.Graph()
|
| 184 |
word_freq = defaultdict(int)
|
| 185 |
lemma_to_word = {}
|
|
|
|
| 186 |
|
| 187 |
-
# Count frequencies of lemmas and map lemmas to their most common word form
|
| 188 |
for token in doc:
|
| 189 |
if token.pos_ in ['NOUN', 'VERB']:
|
| 190 |
lemma = token.lemma_.lower()
|
| 191 |
word_freq[lemma] += 1
|
| 192 |
if lemma not in lemma_to_word or token.text.lower() == lemma:
|
| 193 |
lemma_to_word[lemma] = token.text
|
|
|
|
| 194 |
|
| 195 |
# Get top 20 most frequent lemmas
|
| 196 |
top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
|
|
@@ -198,7 +200,7 @@ def visualize_semantic_relations(doc, lang):
|
|
| 198 |
# Add nodes
|
| 199 |
for lemma in top_lemmas:
|
| 200 |
word = lemma_to_word[lemma]
|
| 201 |
-
G.add_node(word, pos=
|
| 202 |
|
| 203 |
# Add edges
|
| 204 |
for token in doc:
|
|
@@ -242,7 +244,6 @@ def visualize_semantic_relations(doc, lang):
|
|
| 242 |
|
| 243 |
return fig
|
| 244 |
|
| 245 |
-
|
| 246 |
############################################################################################################################################
|
| 247 |
def perform_semantic_analysis(text, nlp, lang):
|
| 248 |
doc = nlp(text)
|
|
|
|
| 183 |
G = nx.Graph()
|
| 184 |
word_freq = defaultdict(int)
|
| 185 |
lemma_to_word = {}
|
| 186 |
+
lemma_to_pos = {}
|
| 187 |
|
| 188 |
+
# Count frequencies of lemmas and map lemmas to their most common word form and POS
|
| 189 |
for token in doc:
|
| 190 |
if token.pos_ in ['NOUN', 'VERB']:
|
| 191 |
lemma = token.lemma_.lower()
|
| 192 |
word_freq[lemma] += 1
|
| 193 |
if lemma not in lemma_to_word or token.text.lower() == lemma:
|
| 194 |
lemma_to_word[lemma] = token.text
|
| 195 |
+
lemma_to_pos[lemma] = token.pos_
|
| 196 |
|
| 197 |
# Get top 20 most frequent lemmas
|
| 198 |
top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
|
|
|
|
| 200 |
# Add nodes
|
| 201 |
for lemma in top_lemmas:
|
| 202 |
word = lemma_to_word[lemma]
|
| 203 |
+
G.add_node(word, pos=lemma_to_pos[lemma])
|
| 204 |
|
| 205 |
# Add edges
|
| 206 |
for token in doc:
|
|
|
|
| 244 |
|
| 245 |
return fig
|
| 246 |
|
|
|
|
| 247 |
############################################################################################################################################
|
| 248 |
def perform_semantic_analysis(text, nlp, lang):
|
| 249 |
doc = nlp(text)
|