Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Sun Jul 17 06:45:41 PM EDT 2022 | |
| author: Ryan Hildebrandt, github.com/ryancahildebrandt | |
| """ | |
| # imports | |
| import nltk | |
| import pandas as pd | |
| import random | |
| import re | |
| import streamlit as st | |
| import string | |
| from nltk.corpus import stopwords | |
| from nltk.stem import PorterStemmer | |
| from nltk.stem import WordNetLemmatizer | |
| from nltk.tokenize import word_tokenize | |
| from spellchecker import SpellChecker | |
| random.seed(42) | |
| spell = SpellChecker() | |
| def prep_load(): | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| nltk.download('omw-1.4') | |
| def prep_lower(in_text): | |
| return [i.lower() for i in in_text] | |
| def prep_punct(in_text): | |
| return [i.translate(str.maketrans('', '', string.punctuation)) for i in in_text] | |
| def prep_stop(in_text): | |
| t = [] | |
| for i in in_text: | |
| t.append(" ".join([j for j in word_tokenize(i) if j not in stopwords.words()])) | |
| return t | |
| def prep_lemma(in_text): | |
| t = [] | |
| for i in in_text: | |
| t.append(" ".join([WordNetLemmatizer().lemmatize(k) for k in word_tokenize(i)])) | |
| return t | |
| def prep_stem(in_text): | |
| t = [] | |
| for i in in_text: | |
| t.append(" ".join([PorterStemmer().stem(k) for k in word_tokenize(i)])) | |
| return t | |
| def prep_spell(in_text): | |
| t = [] | |
| for i in in_text: | |
| t.append(" ".join([j if j in string.punctuation else spell.correction(j) for j in word_tokenize(i)])) | |
| return t | |
| clause_reg = "[\.\!\\\/\|,\?\;\:_\-=+]" | |
| clause_words = ["and","about","but","so","because","since","though","although","unless","however","until"] | |
| clause_sep = f"{clause_reg}{' | '.join(clause_words)}".replace("] ", "]") | |
| def prep_clause(in_text): | |
| t = [] | |
| for i in in_text: | |
| for j in re.split(clause_sep, i, flags = re.IGNORECASE): | |
| if j != "": | |
| t.append(str.strip(j)) | |
| return t | |
| def prep_ex(in_text, func): | |
| out = pd.DataFrame.from_dict({"Before" : in_text, "After" : func(in_text)}) | |
| return out |