streamlit-docker-poc

Sleeping

App Files Files Community

daniel-de-leon commited on Aug 24, 2023

Commit

87f0205

1 Parent(s): 2b512a1

shap

Browse files

Files changed (3) hide show

Dockerfile +1 -1
app.py +62 -41
requirements.txt +4 -1

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.8.9
 WORKDIR /app


1	+ FROM python:3.9
2
3	WORKDIR /app
4

app.py CHANGED Viewed

@@ -1,42 +1,63 @@
 import streamlit as st
-import pandas as pd
-import numpy as np
-st.title('Uber pickups in NYC')
-DATE_COLUMN = 'date/time'
-DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
-            'streamlit-demo-data/uber-raw-data-sep14.csv.gz')
-@st.cache_resource
-def load_data(nrows):
-    data = pd.read_csv(DATA_URL, nrows=nrows)
-    lowercase = lambda x: str(x).lower()
-    data.rename(lowercase, axis='columns', inplace=True)
-    data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
-    return data
-data_load_state = st.text('Loading data...')
-data = load_data(10000)
-data_load_state.text("Done! (using st.cache)")
-if st.checkbox('Show raw data'):
-    st.subheader('Raw data')
-    st.write(data)
-st.subheader('Number of pickups by hour')
-hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
-st.bar_chart(hist_values)
-# Some number in the range 0-23
-hour_to_filter = st.slider('hour', 0, 23, 17)
-filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
-st.subheader('Map of all pickups at %s:00' % hour_to_filter)
-st.map(filtered_data)
-uploaded_file = st.file_uploader("Choose a file")
-if uploaded_file is not None:
-    st.write(uploaded_file.name)
-    bytes_data = uploaded_file.getvalue()
-    st.write(len(bytes_data), "bytes")

 import streamlit as st
+import streamlit.components.v1 as components
+from transformers import (AutoModelForSequenceClassification, AutoTokenizer,
+                          pipeline)
+import shap
+from PIL import Image
+st.set_option('deprecation.showPyplotGlobalUse', False)
+output_width = 800
+output_height = 300
+rescale_logits = False
+st.set_page_config(page_title='Text Classification with Shap')
+logo = Image.open('Intel-logo.png')
+st.sidebar.image(logo)
+st.title('Interpreting HF Pipeline Text Classification with Shap')
+form = st.sidebar.form("Model Selection")
+form.header('Model Selection')
+model_name = form.text_input("Enter the name of the text classification LLM (note: model must be fine-tuned on a text classification task)", value = "Hate-speech-CNERG/bert-base-uncased-hatexplain")
+form.form_submit_button("Submit")
+@st.cache_data()
+def load_model(model_name):
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    return tokenizer, model
+tokenizer, model = load_model(model_name)
+pred = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None)
+explainer = shap.Explainer(pred, rescale_to_logits = rescale_logits)
+col1, col2 = st.columns(2)
+text = col1.text_area("Enter text input", value = "Classify me.")
+result = pred(text)
+top_pred = result[0][0]['label']
+col2.write('')
+for label in result[0]:
+    col2.write(f'**{label["label"]}**: {label["score"]: .2f}')
+shap_values = explainer([text])
+force_plot = shap.plots.text(shap_values, display=False)
+bar_plot = shap.plots.bar(shap_values[0, :, top_pred], order=shap.Explanation.argsort.flip, show=False)
+st.markdown("""
+<style>
+.big-font {
+    font-size:35px !important;
+}
+</style>
+""", unsafe_allow_html=True)
+st.markdown(f'<center><p class="big-font">Shap Bar Plot for <i>{top_pred}</i> Prediction</p></center>', unsafe_allow_html=True)
+st.pyplot(bar_plot, clear_figure=True)
+st.markdown('<center><p class="big-font">Shap Interactive Force Plot</p></center>', unsafe_allow_html=True)
+components.html(force_plot, height=output_height, width=output_width, scrolling=True)

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 streamlit
 numpy
-pandas

 streamlit
+transformers
+shap
+torch
+matplotlib
 numpy