Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import glob, os, sys; sys.path.append('/src') | |
| #import helper | |
| from src import preprocessing as pre | |
| from src import cleaning as clean | |
| def app(): | |
| # Sidebar | |
| st.sidebar.title('Analyse Policy Document') | |
| # Container | |
| with st.container(): | |
| st.markdown("<h1 style='text-align: center; color: black;'>SDSN X GIZ Policy Tracing</h1>", | |
| unsafe_allow_html=True) | |
| file = st.file_uploader('Upload PDF File', type=['pdf', 'docx', 'txt']) | |
| if file is not None: | |
| st.write("Filename: ", file.name) | |
| # text = [] | |
| # with pdfplumber.open(file) as pdf: | |
| # for page in pdf.pages: | |
| # text.append(page.extract_text()) | |
| # text_str = ' '.join([page for page in text]) | |
| # st.write('Number of pages:',len(pdf.pages)) | |
| # load document | |
| docs = pre.load_document(file) | |
| # preprocess document | |
| docs_processed, df, all_text = clean.preprocessing(docs) | |
| st.write('... ') | |
| else: | |
| st.write(' ') | |
| st.write(' ') | |
| st.markdown("<h3 style='text-align: center; color: black;'>no PDF uploaded ...</h3>", | |
| unsafe_allow_html=True) | |