Update modules/morpho_analysis.py
Browse files- modules/morpho_analysis.py +18 -2
modules/morpho_analysis.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
# /modules/morpho_analysis.py
|
| 2 |
import spacy
|
| 3 |
from collections import Counter
|
|
|
|
|
|
|
| 4 |
|
| 5 |
# Define colors for grammatical categories
|
| 6 |
POS_COLORS = {
|
|
@@ -79,6 +81,7 @@ POS_TRANSLATIONS = {
|
|
| 79 |
}
|
| 80 |
}
|
| 81 |
|
|
|
|
| 82 |
def get_repeated_words_colors(doc):
|
| 83 |
word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT')
|
| 84 |
repeated_words = {word: count for word, count in word_counts.items() if count > 1}
|
|
@@ -89,7 +92,8 @@ def get_repeated_words_colors(doc):
|
|
| 89 |
word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF')
|
| 90 |
|
| 91 |
return word_colors
|
| 92 |
-
|
|
|
|
| 93 |
def highlight_repeated_words(doc, word_colors):
|
| 94 |
highlighted_text = []
|
| 95 |
for token in doc:
|
|
@@ -98,4 +102,16 @@ def highlight_repeated_words(doc, word_colors):
|
|
| 98 |
highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>')
|
| 99 |
else:
|
| 100 |
highlighted_text.append(token.text)
|
| 101 |
-
return ' '.join(highlighted_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# /modules/morpho_analysis.py
|
| 2 |
import spacy
|
| 3 |
from collections import Counter
|
| 4 |
+
from spacy import displacy
|
| 5 |
+
import re
|
| 6 |
|
| 7 |
# Define colors for grammatical categories
|
| 8 |
POS_COLORS = {
|
|
|
|
| 81 |
}
|
| 82 |
}
|
| 83 |
|
| 84 |
+
#############################################################################################
|
| 85 |
def get_repeated_words_colors(doc):
|
| 86 |
word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT')
|
| 87 |
repeated_words = {word: count for word, count in word_counts.items() if count > 1}
|
|
|
|
| 92 |
word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF')
|
| 93 |
|
| 94 |
return word_colors
|
| 95 |
+
|
| 96 |
+
######################################################################################################
|
| 97 |
def highlight_repeated_words(doc, word_colors):
|
| 98 |
highlighted_text = []
|
| 99 |
for token in doc:
|
|
|
|
| 102 |
highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>')
|
| 103 |
else:
|
| 104 |
highlighted_text.append(token.text)
|
| 105 |
+
return ' '.join(highlighted_text)
|
| 106 |
+
|
| 107 |
+
#################################################################################################
|
| 108 |
+
def generate_arc_diagram(doc, lang_code):
|
| 109 |
+
sentences = list(doc.sents)
|
| 110 |
+
arc_diagrams = []
|
| 111 |
+
for sent in sentences:
|
| 112 |
+
html = displacy.render(sent, style="dep", options={"distance": 100})
|
| 113 |
+
html = html.replace('height="375"', 'height="200"')
|
| 114 |
+
html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
|
| 115 |
+
html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', lambda m: f'<g transform="translate({m.group(1)},50)"', html)
|
| 116 |
+
arc_diagrams.append(html)
|
| 117 |
+
return arc_diagrams
|