Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

DavMelchi commited on Aug 3

Commit

9d2b604

1 Parent(s): 583ea65

Adding global trafic analysis

Browse files

Files changed (2) hide show

app.py +4 -0
apps/kpi_analysis/trafic_analysis.py +399 -0

app.py CHANGED Viewed

@@ -176,6 +176,10 @@ if check_password():
                 "apps/kpi_analysis/anomalie.py",
                 title=" 📊 KPIs Anomaly Detection",
             ),
         ],
         "Documentations": [
             st.Page(

                 "apps/kpi_analysis/anomalie.py",
                 title=" 📊 KPIs Anomaly Detection",
             ),
+            st.Page(
+                "apps/kpi_analysis/trafic_analysis.py",
+                title=" 📊 Trafic Analysis",
+            ),
         ],
         "Documentations": [
             st.Page(

apps/kpi_analysis/trafic_analysis.py ADDED Viewed

	@@ -0,0 +1,399 @@

+from datetime import datetime
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+from utils.convert_to_excel import convert_dfs, save_dataframe
+from utils.utils_vars import get_physical_db
+class TraficAnalysis:
+    last_period_df: pd.DataFrame = None
+############### PROCESSING ###############
+def extract_code(name):
+    name = name.replace(" ", "_") if isinstance(name, str) else None
+    return int(name.split("_")[0]) if name and len(name) >= 10 else None
+def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame:
+    df = df[df["BCF name"].str.len() >= 10].copy()
+    df["2g_data_trafic"] = df["TRAFFIC_PS DL"] + df["PS_UL_Load"]
+    df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
+    df["code"] = df["BCF name"].apply(extract_code)
+    df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y")
+    df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
+    df = df.groupby(["date", "ID", "code"], as_index=False)[
+        ["2g_data_trafic", "2g_voice_trafic"]
+    ].sum()
+    return df
+def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
+    df = df[df["WBTS name"].str.len() >= 10].copy()
+    df["code"] = df["WBTS name"].apply(extract_code)
+    df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y")
+    df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
+    df.rename(
+        columns={
+            "Total CS traffic - Erl": "3g_voice_trafic",
+            "Total_Data_Traffic": "3g_data_trafic",
+        },
+        inplace=True,
+    )
+    df = df.groupby(["date", "ID", "code"], as_index=False)[
+        ["3g_voice_trafic", "3g_data_trafic"]
+    ].sum()
+    return df
+def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame:
+    df = df[df["LNBTS name"].str.len() >= 10].copy()
+    df["lte_data_trafic"] = (
+        df["4G/LTE DL Traffic Volume (GBytes)"]
+        + df["4G/LTE UL Traffic Volume (GBytes)"]
+    )
+    df["code"] = df["LNBTS name"].apply(extract_code)
+    df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y")
+    df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
+    df = df.groupby(["date", "ID", "code"], as_index=False)[["lte_data_trafic"]].sum()
+    return df
+############################## ANALYSIS ################
+def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_range):
+    # Load physical database
+    physical_db = get_physical_db()
+    physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
+    physical_db["code"] = (
+        pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
+    )
+    physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
+    physical_db = physical_db.drop_duplicates(subset="code")
+    df = pd.merge(df_2g, df_3g, on=["date", "ID", "code"], how="outer")
+    df = pd.merge(df, df_lte, on=["date", "ID", "code"], how="outer")
+    # print(df)
+    for col in [
+        "2g_data_trafic",
+        "2g_voice_trafic",
+        "3g_voice_trafic",
+        "3g_data_trafic",
+        "lte_data_trafic",
+    ]:
+        if col not in df:
+            df[col] = 0
+    df.fillna(0, inplace=True)
+    df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
+    df["total_data_trafic"] = (
+        df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
+    )
+    df = pd.merge(df, physical_db, on=["code"], how="left")
+    # Assign period based on date range
+    pre_start, pre_end = pd.to_datetime(pre_range[0]), pd.to_datetime(pre_range[1])
+    post_start, post_end = pd.to_datetime(post_range[0]), pd.to_datetime(post_range[1])
+    last_period_start, last_period_end = pd.to_datetime(
+        last_period_range[0]
+    ), pd.to_datetime(last_period_range[1])
+    last_period = df[
+        (df["date"] >= last_period_start) & (df["date"] <= last_period_end)
+    ]
+    def assign_period(date):
+        if pre_start <= date <= pre_end:
+            return "pre"
+        elif post_start <= date <= post_end:
+            return "post"
+        else:
+            return "other"
+    df["period"] = df["date"].apply(assign_period)
+    comparison = df[df["period"].isin(["pre", "post"])]
+    pivot = (
+        comparison.groupby(["code", "period"])[
+            ["total_voice_trafic", "total_data_trafic"]
+        ]
+        .sum()
+        .unstack()
+    )
+    pivot.columns = [f"{metric}_{period}" for metric, period in pivot.columns]
+    pivot = pivot.reset_index()
+    # Differences
+    pivot["total_voice_trafic_diff"] = (
+        pivot["total_voice_trafic_post"] - pivot["total_voice_trafic_pre"]
+    )
+    pivot["total_data_trafic_diff"] = (
+        pivot["total_data_trafic_post"] - pivot["total_data_trafic_pre"]
+    )
+    for metric in ["total_voice_trafic", "total_data_trafic"]:
+        pivot[f"{metric}_diff_pct"] = (
+            (pivot.get(f"{metric}_post", 0) - pivot.get(f"{metric}_pre", 0))
+            / pivot.get(f"{metric}_pre", 1)
+        ) * 100
+    return df, last_period, pivot.round(2)
+############################## UI #########################
+st.title("📊 Global Trafic Analysis - 2G / 3G / LTE")
+doc_col, image_col = st.columns(2)
+with doc_col:
+    st.write(
+        """
+        The report analyzes 2G / 3G / LTE traffic :
+        - 2G Traffic Report in CSV format (required columns : BCF name, PERIOD_START_TIME, TRAFFIC_PS DL, PS_UL_Load)
+        - 3G Traffic Report in CSV format (required columns : WBTS name, PERIOD_START_TIME, Total CS traffic - Erl, Total_Data_Traffic)
+        - LTE Traffic Report in CSV format (required columns : LNBTS name, PERIOD_START_TIME, 4G/LTE DL Traffic Volume (GBytes), 4G/LTE UL Traffic Volume (GBytes))
+        """
+    )
+# with image_col:
+#     st.image("./assets/trafic_analysis.png", width=250)
+upload_2g_col, upload_3g_col, upload_lte_col = st.columns(3)
+with upload_2g_col:
+    two_g_file = st.file_uploader(
+        "Upload 2G Traffic Report", type=["csv", "xls", "xlsx"]
+    )
+with upload_3g_col:
+    three_g_file = st.file_uploader(
+        "Upload 3G Traffic Report", type=["csv", "xls", "xlsx"]
+    )
+with upload_lte_col:
+    lte_file = st.file_uploader(
+        "Upload LTE Traffic Report", type=["csv", "xls", "xlsx"]
+    )
+pre_range_col, post_range_col = st.columns(2)
+with pre_range_col:
+    pre_range = st.date_input("Pre-period (from - to)", [])
+with post_range_col:
+    post_range = st.date_input("Post-period (from - to)", [])
+last_period_range_col, number_of_top_trafic_sites_col = st.columns(2)
+with last_period_range_col:
+    last_period_range = st.date_input("Last period (from - to)", [])
+with number_of_top_trafic_sites_col:
+    number_of_top_trafic_sites = st.number_input(
+        "Number of top traffic sites", value=25
+    )
+if len(pre_range) != 2 or len(post_range) != 2:
+    st.warning("⚠️ Please select 2 dates for each period (pre and post).")
+    st.stop()
+if not all([two_g_file, three_g_file, lte_file]):
+    st.info("Please upload all 3 reports and select the comparison periods.")
+    st.stop()
+if st.button("🔍 Run Analysis"):
+    df_2g = pd.read_csv(two_g_file, delimiter=";")
+    df_3g = pd.read_csv(three_g_file, delimiter=";")
+    df_lte = pd.read_csv(lte_file, delimiter=";")
+    df_2g_clean = preprocess_2g(df_2g)
+    df_3g_clean = preprocess_3g(df_3g)
+    df_lte_clean = preprocess_lte(df_lte)
+    full_df, last_period, summary_df = merge_and_compare(
+        df_2g_clean, df_3g_clean, df_lte_clean, pre_range, post_range, last_period_range
+    )
+    # 🔍 Display Summary
+    st.success("✅ Analysis completed")
+    st.subheader("📈 Summary Analysis Pre / Post")
+    st.dataframe(summary_df)
+    TraficAnalysis.last_period_df = last_period
+#######################################################################################################""
+#######################################################################################################
+if TraficAnalysis.last_period_df is not None:
+    df = TraficAnalysis.last_period_df
+    # Get top trafics sites based on total data trafic during last period
+    top_sites = (
+        df.groupby(["code", "City"])["total_data_trafic"]
+        .sum()
+        .sort_values(ascending=False)
+    )
+    top_sites = top_sites.head(number_of_top_trafic_sites)
+    st.subheader(f"Top {number_of_top_trafic_sites} sites by data traffic")
+    chart_col, data_col = st.columns(2)
+    with data_col:
+        st.dataframe(top_sites.sort_values(ascending=True))
+    # chart
+    fig = px.bar(
+        top_sites.reset_index(),
+        y=top_sites.reset_index()[["City", "code"]].agg(
+            lambda x: "_".join(map(str, x)), axis=1
+        ),
+        x="total_data_trafic",
+        title=f"Top {number_of_top_trafic_sites} sites by data traffic",
+        orientation="h",
+        text="total_data_trafic",
+        text_auto=True,
+    )
+    # fig.update_layout(height=600)
+    with chart_col:
+        st.plotly_chart(fig)
+    # Top sites by voice trafic during last period
+    top_sites_voice = (
+        df.groupby(["code", "City"])["total_voice_trafic"]
+        .sum()
+        .sort_values(ascending=False)
+    )
+    top_sites_voice = top_sites_voice.head(number_of_top_trafic_sites)
+    st.subheader(f"Top {number_of_top_trafic_sites} sites by voice traffic")
+    chart_col, data_col = st.columns(2)
+    with data_col:
+        st.dataframe(top_sites_voice.sort_values(ascending=True))
+    # chart
+    fig = px.bar(
+        top_sites_voice.reset_index(),
+        y=top_sites_voice.reset_index()[["City", "code"]].agg(
+            lambda x: "_".join(map(str, x)), axis=1
+        ),
+        x="total_voice_trafic",
+        title=f"Top {number_of_top_trafic_sites} sites by voice traffic",
+        orientation="h",
+        text="total_voice_trafic",
+        text_auto=True,
+    )
+    # fig.update_layout(height=600)
+    with chart_col:
+        st.plotly_chart(fig)
+    #####################################################
+    min_size = 5
+    max_size = 40
+    # Map of sum of data trafic during last period
+    # Aggregate total data traffic
+    df_data = (
+        df.groupby(["code", "City", "Latitude", "Longitude"])["total_data_trafic"]
+        .sum()
+        .reset_index()
+    )
+    st.subheader("Map of data trafic during last period")
+    # Define size range
+    # Linear size scaling
+    traffic_data_min = df_data["total_data_trafic"].min()
+    traffic_data_max = df_data["total_data_trafic"].max()
+    df_data["bubble_size"] = df_data["total_data_trafic"].apply(
+        lambda x: min_size
+        + (max_size - min_size)
+        * (x - traffic_data_min)
+        / (traffic_data_max - traffic_data_min)
+    )
+    # Custom blue color scale: start from visible blue
+    custom_blue_red = [
+        [0.0, "#4292c6"],  # light blue
+        [0.2, "#2171b5"],
+        [0.4, "#084594"],  # dark blue
+        [0.6, "#cb181d"],  # Strong red
+        [0.8, "#a50f15"],  # Darker red
+        [1.0, "#67000d"],  # Very dark red
+    ]
+    fig = px.scatter_map(
+        df_data,
+        lat="Latitude",
+        lon="Longitude",
+        color="total_data_trafic",
+        size="bubble_size",
+        color_continuous_scale=custom_blue_red,
+        size_max=max_size,
+        zoom=10,
+        height=600,
+        title="Data traffic distribution",
+        hover_data={"code": True, "total_data_trafic": True},
+        hover_name="code",
+        text=[str(x) for x in df_data["code"]],
+    )
+    fig.update_layout(
+        mapbox_style="open-street-map",
+        coloraxis_colorbar=dict(title="Total Data Traffic (MB)"),
+        coloraxis=dict(cmin=traffic_data_min, cmax=traffic_data_max),
+        font=dict(size=10, color="black"),
+    )
+    st.plotly_chart(fig)
+    ########################################################################################
+    # Map of sum of voice trafic during last period
+    # Aggregate total voice traffic
+    df_voice = (
+        df.groupby(["code", "City", "Latitude", "Longitude"])["total_voice_trafic"]
+        .sum()
+        .reset_index()
+    )
+    st.subheader("Map of voice trafic during last period")
+    # Linear size scaling
+    traffic_voice_min = df_voice["total_voice_trafic"].min()
+    traffic_voice_max = df_voice["total_voice_trafic"].max()
+    df_voice["bubble_size"] = df_voice["total_voice_trafic"].apply(
+        lambda x: min_size
+        + (max_size - min_size)
+        * (x - traffic_voice_min)
+        / (traffic_voice_max - traffic_voice_min)
+    )
+    fig = px.scatter_map(
+        df_voice,
+        lat="Latitude",
+        lon="Longitude",
+        color="total_voice_trafic",
+        size="bubble_size",
+        color_continuous_scale=custom_blue_red,
+        size_max=max_size,
+        zoom=10,
+        height=600,
+        title="Voice traffic distribution",
+        hover_data={"code": True, "total_voice_trafic": True},
+        hover_name="code",
+        text=[str(x) for x in df_voice["code"]],
+    )
+    fig.update_layout(
+        mapbox_style="open-street-map",
+        coloraxis_colorbar=dict(title="Total Voice Traffic (MB)"),
+        coloraxis=dict(cmin=traffic_voice_min, cmax=traffic_voice_max),
+        font=dict(size=10, color="black"),
+    )
+    st.plotly_chart(fig)
+    final_dfs = convert_dfs(
+        [full_df, summary_df], ["Global_Trafic_Analysis", "Pre_Post_analysis"]
+    )
+    # 📥 Bouton de téléchargement
+    st.download_button(
+        on_click="ignore",
+        type="primary",
+        label="Download the Analysis Report",
+        data=final_dfs,
+        file_name=f"Global_Trafic_Analysis_Report_{datetime.now()}.xlsx",
+        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    )