DavMelchi commited on
Commit
56be3a7
·
1 Parent(s): 75a24b6

Add persistent availability issue detection with consecutive days tracking and criticity scoring for multi-RAT sites

Browse files
Files changed (1) hide show
  1. apps/kpi_analysis/trafic_analysis.py +330 -13
apps/kpi_analysis/trafic_analysis.py CHANGED
@@ -1,6 +1,6 @@
1
  import io
2
  import zipfile
3
- from datetime import datetime
4
  from pathlib import Path
5
 
6
  import numpy as np
@@ -673,6 +673,159 @@ def analyze_multirat_availability(
673
  return multi
674
 
675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
  def monthly_data_analysis(df: pd.DataFrame) -> pd.DataFrame:
677
  df["date"] = pd.to_datetime(df["date"])
678
 
@@ -771,7 +924,9 @@ if pre_range[0] < post_range[0] and pre_range[1] > post_range[1]:
771
  st.warning(" Pre and post periode are overlapping.")
772
  st.stop()
773
 
774
- if st.button(" Run Analysis"):
 
 
775
 
776
  df_2g = read_uploaded_file(two_g_file)
777
  df_3g = read_uploaded_file(three_g_file)
@@ -794,15 +949,45 @@ if st.button(" Run Analysis"):
794
 
795
  monthly_voice_df, monthly_data_df = monthly_data_analysis(full_df)
796
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
797
  full_df["week"] = full_df["date"].dt.isocalendar().week
798
  full_df["year"] = full_df["date"].dt.isocalendar().year
799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800
  # Display Summary
801
  st.success(" Analysis completed")
802
  st.subheader(" Summary Analysis Pre / Post")
803
  st.dataframe(sum_pre_post_analysis)
804
 
805
- summary_2g_avail, site_2g_avail = analyze_2g_availability(full_df, sla_2g)
806
  if summary_2g_avail is not None:
807
  st.subheader("2G - TCH Availability vs SLA")
808
  st.write(f"SLA target 2G TCH availability: {sla_2g}%")
@@ -816,7 +1001,7 @@ if st.button(" Run Analysis"):
816
  "2G TCH availability KPI not found in input report or no data for selected periods."
817
  )
818
 
819
- summary_3g_avail, site_3g_avail = analyze_3g_availability(full_df, sla_3g)
820
  if summary_3g_avail is not None:
821
  st.subheader("3G - Cell Availability vs SLA")
822
  st.write(f"SLA target 3G Cell availability: {sla_3g}%")
@@ -830,7 +1015,7 @@ if st.button(" Run Analysis"):
830
  "3G Cell Availability KPI not found in input report or no data for selected periods."
831
  )
832
 
833
- summary_lte_avail, site_lte_avail = analyze_lte_availability(full_df, sla_lte)
834
  if summary_lte_avail is not None:
835
  st.subheader("LTE - Cell Availability vs SLA")
836
  st.write(f"SLA target LTE Cell availability: {sla_lte}%")
@@ -845,7 +1030,7 @@ if st.button(" Run Analysis"):
845
  )
846
 
847
  # Multi-RAT availability view
848
- multi_rat_df = analyze_multirat_availability(full_df, sla_2g, sla_3g, sla_lte)
849
  if multi_rat_df is not None:
850
  st.subheader("Multi-RAT Availability by site (post-period)")
851
  st.dataframe(multi_rat_df.round(2))
@@ -924,12 +1109,132 @@ if st.button(" Run Analysis"):
924
  "No LTE sites with low availability and significant traffic in post-period."
925
  )
926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
927
  # Temporal availability analysis - daily averages per RAT
928
  if any(
929
- col in full_df.columns
930
  for col in ["2g_tch_avail", "3g_cell_avail", "lte_cell_avail"]
931
  ):
932
- temp_df = full_df.copy()
933
  temp_df["date_only"] = temp_df["date"].dt.date
934
 
935
  agg_dict = {}
@@ -1002,7 +1307,7 @@ if st.button(" Run Analysis"):
1002
  st.subheader("Days with average availability below SLA")
1003
  st.dataframe(degraded_df.round(2))
1004
 
1005
- TraficAnalysis.last_period_df = last_period
1006
 
1007
  #######################################################################################################"""
1008
 
@@ -1207,11 +1512,21 @@ if TraficAnalysis.last_period_df is not None:
1207
  else pd.DataFrame()
1208
  )
1209
 
1210
- export_multi_rat = (
1211
- multi_rat_df
1212
- if "multi_rat_df" in locals() and multi_rat_df is not None
1213
- else pd.DataFrame()
1214
  )
 
 
 
 
 
 
 
 
 
 
 
 
1215
 
1216
  final_dfs = convert_dfs(
1217
  [
@@ -1225,6 +1540,7 @@ if TraficAnalysis.last_period_df is not None:
1225
  export_site_3g,
1226
  export_site_lte,
1227
  export_multi_rat,
 
1228
  ],
1229
  [
1230
  "Global_Trafic_Analysis",
@@ -1237,6 +1553,7 @@ if TraficAnalysis.last_period_df is not None:
1237
  "ThreeG_Availability_By_Site",
1238
  "LTE_Availability_By_Site",
1239
  "MultiRAT_Availability_By_Site",
 
1240
  ],
1241
  )
1242
  # 📥 Bouton de téléchargement
 
1
  import io
2
  import zipfile
3
+ from datetime import datetime, timedelta
4
  from pathlib import Path
5
 
6
  import numpy as np
 
673
  return multi
674
 
675
 
676
+ def analyze_persistent_availability(
677
+ df: pd.DataFrame,
678
+ multi_rat_df: pd.DataFrame,
679
+ sla_2g: float,
680
+ sla_3g: float,
681
+ sla_lte: float,
682
+ min_consecutive_days: int = 3,
683
+ ) -> pd.DataFrame:
684
+ if df is None or df.empty:
685
+ return pd.DataFrame()
686
+ if "date" not in df.columns or "code" not in df.columns:
687
+ return pd.DataFrame()
688
+
689
+ work_df = df.copy()
690
+ work_df["date_only"] = work_df["date"].dt.date
691
+
692
+ site_stats = {}
693
+
694
+ def _update_stats(rat_key_prefix: str, grouped: pd.DataFrame, sla: float) -> None:
695
+ if grouped.empty:
696
+ return
697
+ for code, group in grouped.groupby("code"):
698
+ group = group.sort_values("date_only")
699
+ dates = pd.to_datetime(group["date_only"]).tolist()
700
+ below_flags = (group["value"] < sla).tolist()
701
+ max_streak = 0
702
+ current_streak = 0
703
+ total_below = 0
704
+ last_date = None
705
+ for flag, current_date in zip(below_flags, dates):
706
+ if flag:
707
+ total_below += 1
708
+ if (
709
+ last_date is not None
710
+ and current_date == last_date + timedelta(days=1)
711
+ and current_streak > 0
712
+ ):
713
+ current_streak += 1
714
+ else:
715
+ current_streak = 1
716
+ if current_streak > max_streak:
717
+ max_streak = current_streak
718
+ else:
719
+ current_streak = 0
720
+ last_date = current_date
721
+ stats = site_stats.setdefault(
722
+ code,
723
+ {
724
+ "code": code,
725
+ "max_streak_2g": 0,
726
+ "max_streak_3g": 0,
727
+ "max_streak_lte": 0,
728
+ "below_days_2g": 0,
729
+ "below_days_3g": 0,
730
+ "below_days_lte": 0,
731
+ },
732
+ )
733
+ stats[f"max_streak_{rat_key_prefix}"] = max_streak
734
+ stats[f"below_days_{rat_key_prefix}"] = total_below
735
+
736
+ for rat_col, rat_key, sla in [
737
+ ("2g_tch_avail", "2g", sla_2g),
738
+ ("3g_cell_avail", "3g", sla_3g),
739
+ ("lte_cell_avail", "lte", sla_lte),
740
+ ]:
741
+ if rat_col in work_df.columns:
742
+ g = (
743
+ work_df.dropna(subset=[rat_col])
744
+ .groupby(["code", "date_only"])[rat_col]
745
+ .mean()
746
+ .reset_index()
747
+ )
748
+ g = g.rename(columns={rat_col: "value"})
749
+ _update_stats(rat_key, g, sla)
750
+
751
+ if not site_stats:
752
+ return pd.DataFrame()
753
+
754
+ rows = []
755
+ for code, s in site_stats.items():
756
+ max_2g = s.get("max_streak_2g", 0)
757
+ max_3g = s.get("max_streak_3g", 0)
758
+ max_lte = s.get("max_streak_lte", 0)
759
+ below_2g = s.get("below_days_2g", 0)
760
+ below_3g = s.get("below_days_3g", 0)
761
+ below_lte = s.get("below_days_lte", 0)
762
+ persistent_2g = max_2g >= min_consecutive_days if max_2g else False
763
+ persistent_3g = max_3g >= min_consecutive_days if max_3g else False
764
+ persistent_lte = max_lte >= min_consecutive_days if max_lte else False
765
+ total_below_any = below_2g + below_3g + below_lte
766
+ persistent_any = persistent_2g or persistent_3g or persistent_lte
767
+ rats_persistent_count = sum(
768
+ [persistent_2g is True, persistent_3g is True, persistent_lte is True]
769
+ )
770
+ rows.append(
771
+ {
772
+ "code": code,
773
+ "persistent_issue_2g": persistent_2g,
774
+ "persistent_issue_3g": persistent_3g,
775
+ "persistent_issue_lte": persistent_lte,
776
+ "max_consecutive_days_2g": max_2g,
777
+ "max_consecutive_days_3g": max_3g,
778
+ "max_consecutive_days_lte": max_lte,
779
+ "total_below_days_2g": below_2g,
780
+ "total_below_days_3g": below_3g,
781
+ "total_below_days_lte": below_lte,
782
+ "total_below_days_any": total_below_any,
783
+ "persistent_issue_any": persistent_any,
784
+ "persistent_rats_count": rats_persistent_count,
785
+ }
786
+ )
787
+
788
+ result = pd.DataFrame(rows)
789
+ result = result[result["persistent_issue_any"] == True]
790
+ if result.empty:
791
+ return result
792
+
793
+ if multi_rat_df is not None and not multi_rat_df.empty:
794
+ cols_to_merge = [
795
+ c
796
+ for c in [
797
+ "code",
798
+ "City",
799
+ "post_total_voice_trafic",
800
+ "post_total_data_trafic",
801
+ "post_multirat_status",
802
+ ]
803
+ if c in multi_rat_df.columns
804
+ ]
805
+ if cols_to_merge:
806
+ result = pd.merge(
807
+ result,
808
+ multi_rat_df[cols_to_merge].drop_duplicates("code"),
809
+ on="code",
810
+ how="left",
811
+ )
812
+
813
+ if "post_total_data_trafic" not in result.columns:
814
+ result["post_total_data_trafic"] = 0.0
815
+
816
+ result["criticity_score"] = (
817
+ result["post_total_data_trafic"].fillna(0) * 1.0
818
+ + result["total_below_days_any"].fillna(0) * 100.0
819
+ + result["persistent_rats_count"].fillna(0) * 1000.0
820
+ )
821
+
822
+ result = result.sort_values(
823
+ by=["criticity_score", "total_below_days_any"], ascending=[False, False]
824
+ )
825
+
826
+ return result
827
+
828
+
829
  def monthly_data_analysis(df: pd.DataFrame) -> pd.DataFrame:
830
  df["date"] = pd.to_datetime(df["date"])
831
 
 
924
  st.warning(" Pre and post periode are overlapping.")
925
  st.stop()
926
 
927
+ run_analysis = st.button(" Run Analysis")
928
+
929
+ if run_analysis:
930
 
931
  df_2g = read_uploaded_file(two_g_file)
932
  df_3g = read_uploaded_file(three_g_file)
 
949
 
950
  monthly_voice_df, monthly_data_df = monthly_data_analysis(full_df)
951
 
952
+ st.session_state["full_df"] = full_df
953
+ st.session_state["last_period"] = last_period
954
+ st.session_state["sum_pre_post_analysis"] = sum_pre_post_analysis
955
+ st.session_state["avg_pre_post_analysis"] = avg_pre_post_analysis
956
+ st.session_state["monthly_voice_df"] = monthly_voice_df
957
+ st.session_state["monthly_data_df"] = monthly_data_df
958
+
959
+ if "full_df" in st.session_state:
960
+ full_df = st.session_state["full_df"]
961
+ last_period = st.session_state["last_period"]
962
+ sum_pre_post_analysis = st.session_state["sum_pre_post_analysis"]
963
+ avg_pre_post_analysis = st.session_state["avg_pre_post_analysis"]
964
+ monthly_voice_df = st.session_state["monthly_voice_df"]
965
+ monthly_data_df = st.session_state["monthly_data_df"]
966
+
967
  full_df["week"] = full_df["date"].dt.isocalendar().week
968
  full_df["year"] = full_df["date"].dt.isocalendar().year
969
 
970
+ analysis_df = full_df
971
+ analysis_last_period = last_period
972
+ if "City" in full_df.columns:
973
+ available_cities = full_df["City"].dropna().unique()
974
+ if len(available_cities) > 0:
975
+ selected_cities = st.multiselect(
976
+ "Filter analysis by City (optional)",
977
+ sorted(available_cities),
978
+ )
979
+ if selected_cities:
980
+ analysis_df = full_df[full_df["City"].isin(selected_cities)].copy()
981
+ analysis_last_period = last_period[
982
+ last_period["City"].isin(selected_cities)
983
+ ].copy()
984
+
985
  # Display Summary
986
  st.success(" Analysis completed")
987
  st.subheader(" Summary Analysis Pre / Post")
988
  st.dataframe(sum_pre_post_analysis)
989
 
990
+ summary_2g_avail, site_2g_avail = analyze_2g_availability(analysis_df, sla_2g)
991
  if summary_2g_avail is not None:
992
  st.subheader("2G - TCH Availability vs SLA")
993
  st.write(f"SLA target 2G TCH availability: {sla_2g}%")
 
1001
  "2G TCH availability KPI not found in input report or no data for selected periods."
1002
  )
1003
 
1004
+ summary_3g_avail, site_3g_avail = analyze_3g_availability(analysis_df, sla_3g)
1005
  if summary_3g_avail is not None:
1006
  st.subheader("3G - Cell Availability vs SLA")
1007
  st.write(f"SLA target 3G Cell availability: {sla_3g}%")
 
1015
  "3G Cell Availability KPI not found in input report or no data for selected periods."
1016
  )
1017
 
1018
+ summary_lte_avail, site_lte_avail = analyze_lte_availability(analysis_df, sla_lte)
1019
  if summary_lte_avail is not None:
1020
  st.subheader("LTE - Cell Availability vs SLA")
1021
  st.write(f"SLA target LTE Cell availability: {sla_lte}%")
 
1030
  )
1031
 
1032
  # Multi-RAT availability view
1033
+ multi_rat_df = analyze_multirat_availability(analysis_df, sla_2g, sla_3g, sla_lte)
1034
  if multi_rat_df is not None:
1035
  st.subheader("Multi-RAT Availability by site (post-period)")
1036
  st.dataframe(multi_rat_df.round(2))
 
1109
  "No LTE sites with low availability and significant traffic in post-period."
1110
  )
1111
 
1112
+ st.subheader("Persistent availability issues and critical sites")
1113
+ min_persistent_days = st.number_input(
1114
+ "Minimum consecutive days below SLA to flag persistent issue",
1115
+ min_value=2,
1116
+ max_value=30,
1117
+ value=3,
1118
+ step=1,
1119
+ )
1120
+ persistent_df = analyze_persistent_availability(
1121
+ analysis_df, multi_rat_df, sla_2g, sla_3g, sla_lte, int(min_persistent_days)
1122
+ )
1123
+ if persistent_df is not None and not persistent_df.empty:
1124
+ top_critical_n = st.number_input(
1125
+ "Number of top critical sites to display",
1126
+ min_value=5,
1127
+ max_value=200,
1128
+ value=25,
1129
+ step=5,
1130
+ )
1131
+ st.dataframe(persistent_df.head(top_critical_n).round(2))
1132
+ else:
1133
+ st.info(
1134
+ "No persistent availability issues detected with current parameters."
1135
+ )
1136
+
1137
+ if not analysis_df.empty:
1138
+ st.subheader("Site drill-down: traffic and availability over time")
1139
+ sites_df = (
1140
+ analysis_df[["code", "City"]]
1141
+ .drop_duplicates()
1142
+ .sort_values(by=["City", "code"])
1143
+ )
1144
+ site_options = sites_df.apply(
1145
+ lambda row: (
1146
+ f"{row['City']}_{row['code']}"
1147
+ if pd.notna(row["City"])
1148
+ else str(row["code"])
1149
+ ),
1150
+ axis=1,
1151
+ )
1152
+ site_map = dict(zip(site_options, sites_df["code"]))
1153
+ selected_site_label = st.selectbox(
1154
+ "Select a site for detailed view", options=site_options
1155
+ )
1156
+ selected_code = site_map.get(selected_site_label)
1157
+ site_detail_df = analysis_df[analysis_df["code"] == selected_code].copy()
1158
+ if not site_detail_df.empty:
1159
+ site_detail_df = site_detail_df.sort_values("date")
1160
+ traffic_cols = [
1161
+ col
1162
+ for col in ["total_voice_trafic", "total_data_trafic"]
1163
+ if col in site_detail_df.columns
1164
+ ]
1165
+ if traffic_cols:
1166
+ traffic_long = site_detail_df[["date"] + traffic_cols].melt(
1167
+ id_vars="date",
1168
+ value_vars=traffic_cols,
1169
+ var_name="metric",
1170
+ value_name="value",
1171
+ )
1172
+ fig_traffic = px.line(
1173
+ traffic_long,
1174
+ x="date",
1175
+ y="value",
1176
+ color="metric",
1177
+ )
1178
+ st.plotly_chart(fig_traffic)
1179
+ avail_cols = []
1180
+ rename_map = {}
1181
+ if "2g_tch_avail" in site_detail_df.columns:
1182
+ avail_cols.append("2g_tch_avail")
1183
+ rename_map["2g_tch_avail"] = "2G"
1184
+ if "3g_cell_avail" in site_detail_df.columns:
1185
+ avail_cols.append("3g_cell_avail")
1186
+ rename_map["3g_cell_avail"] = "3G"
1187
+ if "lte_cell_avail" in site_detail_df.columns:
1188
+ avail_cols.append("lte_cell_avail")
1189
+ rename_map["lte_cell_avail"] = "LTE"
1190
+ if avail_cols:
1191
+ avail_df = site_detail_df[["date"] + avail_cols].copy()
1192
+ avail_df = avail_df.rename(columns=rename_map)
1193
+ value_cols = [c for c in avail_df.columns if c != "date"]
1194
+ avail_long = avail_df.melt(
1195
+ id_vars="date",
1196
+ value_vars=value_cols,
1197
+ var_name="RAT",
1198
+ value_name="availability",
1199
+ )
1200
+ fig_avail = px.line(
1201
+ avail_long,
1202
+ x="date",
1203
+ y="availability",
1204
+ color="RAT",
1205
+ )
1206
+ st.plotly_chart(fig_avail)
1207
+ site_detail_df["date_only"] = site_detail_df["date"].dt.date
1208
+ degraded_rows_site = []
1209
+ for rat_col, rat_name, sla_value in [
1210
+ ("2g_tch_avail", "2G", sla_2g),
1211
+ ("3g_cell_avail", "3G", sla_3g),
1212
+ ("lte_cell_avail", "LTE", sla_lte),
1213
+ ]:
1214
+ if rat_col in site_detail_df.columns:
1215
+ daily_site = (
1216
+ site_detail_df.groupby("date_only")[rat_col].mean().dropna()
1217
+ )
1218
+ mask = daily_site < sla_value
1219
+ for d, val in daily_site[mask].items():
1220
+ degraded_rows_site.append(
1221
+ {
1222
+ "RAT": rat_name,
1223
+ "date": d,
1224
+ "avg_availability": val,
1225
+ "SLA": sla_value,
1226
+ }
1227
+ )
1228
+ if degraded_rows_site:
1229
+ degraded_site_df = pd.DataFrame(degraded_rows_site)
1230
+ st.dataframe(degraded_site_df.round(2))
1231
+
1232
  # Temporal availability analysis - daily averages per RAT
1233
  if any(
1234
+ col in analysis_df.columns
1235
  for col in ["2g_tch_avail", "3g_cell_avail", "lte_cell_avail"]
1236
  ):
1237
+ temp_df = analysis_df.copy()
1238
  temp_df["date_only"] = temp_df["date"].dt.date
1239
 
1240
  agg_dict = {}
 
1307
  st.subheader("Days with average availability below SLA")
1308
  st.dataframe(degraded_df.round(2))
1309
 
1310
+ TraficAnalysis.last_period_df = analysis_last_period
1311
 
1312
  #######################################################################################################"""
1313
 
 
1512
  else pd.DataFrame()
1513
  )
1514
 
1515
+ export_multi_rat_base = analyze_multirat_availability(
1516
+ full_df, sla_2g, sla_3g, sla_lte
 
 
1517
  )
1518
+ if export_multi_rat_base is not None:
1519
+ export_multi_rat = export_multi_rat_base
1520
+ else:
1521
+ export_multi_rat = pd.DataFrame()
1522
+
1523
+ export_persistent = pd.DataFrame()
1524
+ if export_multi_rat_base is not None:
1525
+ export_persistent_tmp = analyze_persistent_availability(
1526
+ full_df, export_multi_rat_base, sla_2g, sla_3g, sla_lte
1527
+ )
1528
+ if export_persistent_tmp is not None:
1529
+ export_persistent = export_persistent_tmp
1530
 
1531
  final_dfs = convert_dfs(
1532
  [
 
1540
  export_site_3g,
1541
  export_site_lte,
1542
  export_multi_rat,
1543
+ export_persistent,
1544
  ],
1545
  [
1546
  "Global_Trafic_Analysis",
 
1553
  "ThreeG_Availability_By_Site",
1554
  "LTE_Availability_By_Site",
1555
  "MultiRAT_Availability_By_Site",
1556
+ "Top_Critical_Sites",
1557
  ],
1558
  )
1559
  # 📥 Bouton de téléchargement