create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import os
|
| 3 |
+
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
| 4 |
+
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import spacy
|
| 8 |
+
from spacy import displacy
|
| 9 |
+
import re
|
| 10 |
+
|
| 11 |
+
from modules.syntax_analysis import highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS, visualize_syntax
|
| 12 |
+
|
| 13 |
+
# Load spaCy model
|
| 14 |
+
nlp = spacy.load("es_core_news_lg")
|
| 15 |
+
|
| 16 |
+
# Configure the page to use the full width
|
| 17 |
+
st.set_page_config(layout="wide", page_title="AIdeaText")
|
| 18 |
+
|
| 19 |
+
st.markdown("### AIdeaText - Advanced Text Analysis Tool")
|
| 20 |
+
|
| 21 |
+
# First horizontal band: Text Input
|
| 22 |
+
sentence_input = st.text_area("Ingresa un texto para analizar (max 5,000 words):", height=150)
|
| 23 |
+
|
| 24 |
+
if st.button("Analizar texto"):
|
| 25 |
+
if sentence_input:
|
| 26 |
+
doc = nlp(sentence_input)
|
| 27 |
+
|
| 28 |
+
# Second horizontal band: Highlighted Repeated Words
|
| 29 |
+
with st.expander("Palabras repetidas", expanded=True):
|
| 30 |
+
#st.markdown("#### Palabras repetidas")
|
| 31 |
+
#st.write("En esta sección, se indican las palabras repetidas por categoría gramatical.")
|
| 32 |
+
word_colors = get_repeated_words_colors(doc)
|
| 33 |
+
highlighted_text = highlight_repeated_words(doc, word_colors)
|
| 34 |
+
st.markdown(highlighted_text, unsafe_allow_html=True)
|
| 35 |
+
|
| 36 |
+
# Legend for grammatical categories
|
| 37 |
+
st.markdown("##### Legenda: Categorías gramaticales")
|
| 38 |
+
legend_html = "<div style='display: flex; flex-wrap: wrap;'>"
|
| 39 |
+
for pos, color in POS_COLORS.items():
|
| 40 |
+
if pos in POS_TRANSLATIONS:
|
| 41 |
+
legend_html += f"<div style='margin-right: 10px;'><span style='background-color: {color}; padding: 2px 5px;'>{POS_TRANSLATIONS[pos]}</span></div>"
|
| 42 |
+
legend_html += "</div>"
|
| 43 |
+
st.markdown(legend_html, unsafe_allow_html=True)
|
| 44 |
+
|
| 45 |
+
# Third horizontal band: Arc Diagram
|
| 46 |
+
with st.expander("Análisis sintáctico: Diagrama de arco", expanded=True):
|
| 47 |
+
#st.write("This section displays the syntactic structure of each sentence using arc diagrams.")
|
| 48 |
+
|
| 49 |
+
sentences = list(doc.sents)
|
| 50 |
+
for i, sent in enumerate(sentences):
|
| 51 |
+
st.subheader(f"Sentence {i+1}")
|
| 52 |
+
html = displacy.render(sent, style="dep", options={"distance": 100})
|
| 53 |
+
# Reduce the height of the SVG
|
| 54 |
+
html = html.replace('height="375"', 'height="200"')
|
| 55 |
+
# Reduce the top margin of the SVG
|
| 56 |
+
html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
|
| 57 |
+
html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
|
| 58 |
+
lambda m: f'<g transform="translate({m.group(1)},50)"', html)
|
| 59 |
+
st.write(html, unsafe_allow_html=True)
|
| 60 |
+
|
| 61 |
+
# Fourth horizontal band: Network graph
|
| 62 |
+
with st.expander("Análisis sintáctico: Diagrama de red", expanded=True):
|
| 63 |
+
st.markdown("#### Análisis sintáctico: Diagrama de red")
|
| 64 |
+
st.write("Esta sección muestra la estructura sintáctica del texto completo usando un diagrama de red.")
|
| 65 |
+
|
| 66 |
+
fig = visualize_syntax(sentence_input)
|
| 67 |
+
st.pyplot(fig)
|