Spaces:

somosnlp-hackathon-2023
/

demo_DiagTrast

Sleeping

Stremie commited on Apr 4, 2023

Commit

3be88b5

1 Parent(s): a460ebb

First try testing model

Files changed (4) hide show

__init__.py ADDED Viewed

File without changes

__pycache__/utils.cpython-38.pyc ADDED Viewed

Binary file (1.55 kB). View file

app.py CHANGED Viewed

@@ -1,8 +1,24 @@
 import streamlit as st
-st.title('Mi primer titulo')
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)
-st.markdown('Texto')

 import streamlit as st
+import utils
+from transformers import pipeline
+from transformers import AutoTokenizer
+from transformers import AutoModelForSequenceClassification
+#####################
+model_id='hackathon-somos-nlp-2023/DiagTrast'
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+classifier = pipeline("text-classification",
+                      model=model_id)
+#####################
+st.title('Diagnóstico de Trastornos Mentales')
+sintomas = st.text_input(label = 'Introduce síntomas',
+                         value = '')
+st.markdown(classifier(utils.clean_text(sintomas)))

utils.py ADDED Viewed

+import re
+import nltk
+from nltk.corpus import stopwords
+def eliminar_acento(s):
+    replacements = (
+        ("á", "a"),
+        ("é", "e"),
+        ("í", "i"),
+        ("ó", "o"),
+        ("ú", "u"),
+    )
+    for a, b in replacements:
+        s = s.replace(a, b).replace(a.upper(), b.upper())
+    return s
+def eliminar_patrones_stopwords(text):
+    nltk.download('stopwords')
+    lstopwords = set(stopwords.words('spanish'))
+    text = [word for word in text.strip().split() if not word in lstopwords]
+    text = ' '.join(text)
+    return text
+def eliminar_espacios_blancos(texto):
+    texto = re.sub(r"\:|\_", '', texto)
+    texto = re.sub(r"o\/a", 'o', texto)
+    texto = re.sub(r'[^\w\s]', '', texto)
+    return texto
+def clean_text(original):
+    original = re.sub(r'\w+(?:\.+\w+)*', lambda x: x.group(0).replace('.', ' '), original)
+    original = re.sub(r'\.','' , original)
+    texto = eliminar_acento(original)
+    texto = eliminar_espacios_blancos(texto)
+    texto = re.sub(r" +", ' ', texto)
+    texto = texto.lower()
+    texto = eliminar_patrones_stopwords(texto)
+    original =  re.sub(r" +", ' ', texto)
+    return texto