File size: 1,966 Bytes
754f6cf
9749cc5
 
 
 
 
 
 
754f6cf
9749cc5
 
c314050
754f6cf
9749cc5
2429d44
f2f2a91
754f6cf
9749cc5
f2f2a91
9749cc5
754f6cf
9749cc5
754f6cf
9749cc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c314050
9749cc5
 
 
 
 
c314050
9749cc5
 
 
 
 
c314050
 
9749cc5
754f6cf
9749cc5
754f6cf
c7f7d04
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import streamlit as st

# -------------------------
# Streamlit App β€” Tipping 
# -------------------------

# Load dataset
tips = sns.load_dataset("tips")
tips["tip_percentage"] = tips["tip"] / tips["total_bill"] * 100

# User question
st.title("πŸ’‘ Do people tip more on certain days of the week?")
st.subheader("Explore average and highest tip percentages by day, time.")

# Short problem statement
st.write("Check whether tipping behavior changes depending on the day of the week or time of the day. "
         "Use the filters to explore differences by day and time(Lunch/Dinner) of day.")

# Sidebar filters
with st.sidebar:
    st.subheader("Filters")
    # Day filter
    all_days = sorted(tips["day"].dropna().unique().tolist())
    selected_days = st.multiselect("Days to show", options=all_days, default=all_days)
    
    # Time filter
    all_times = tips["time"].dropna().unique().tolist()
    selected_time = st.selectbox("Select time of day", options=["All"] + all_times)
    

# Apply filters
filtered = tips[tips["day"].isin(selected_days)]
if selected_time != "All":
    filtered = filtered[filtered["time"] == selected_time]


# KPI: average tip percentage
avg_tip = filtered["tip_percentage"].mean()
st.metric("For selected days and time , Average Tip %", f"{avg_tip:.2f}%")

# Visualization
if not filtered.empty:
    plt.figure(figsize=(6,4))
    sns.boxplot(x="day", y="tip_percentage", data=filtered, order=all_days)
    plt.title("Tip Percentage by Day")
    st.pyplot(plt.gcf())
    plt.close()
    
    # Dynamic insight
    best_day = filtered.groupby("day")["tip_percentage"].mean().idxmax()
    best_value = filtered.groupby("day")["tip_percentage"].mean().max()
    st.success(f"πŸ’‘ Insight: On average, for {selected_time}, {best_day} has the highest tip percentage at {best_value:.2f}%")
else:
    st.info("No data available for the selected filters.")