DavMelchi commited on
Commit
67209eb
·
1 Parent(s): 440ac25

Add performance profiling toggle with detailed timing metrics for health check evaluation, multi-RAT views, traffic computation, delta calculation, and Excel export with per-sheet write timings and row/column counts

Browse files
panel_app/convert_to_excel_panel.py CHANGED
@@ -1,11 +1,15 @@
1
  import io
 
2
  from typing import Iterable, Sequence
3
 
4
  import pandas as pd
5
 
6
 
7
  def write_dfs_to_excel(
8
- dfs: Sequence[pd.DataFrame], sheet_names: Sequence[str], index: bool = True
 
 
 
9
  ) -> bytes:
10
  """Simple Excel export for Panel.
11
 
@@ -14,10 +18,38 @@ def write_dfs_to_excel(
14
  and avoid Streamlit runtime warnings.
15
  """
16
  bytes_io = io.BytesIO()
 
17
  with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
18
  for df, name in zip(dfs, sheet_names):
19
  # Ensure we always write a valid DataFrame, even if None was passed
20
  safe_df = df if isinstance(df, pd.DataFrame) else pd.DataFrame()
 
21
  safe_df.to_excel(writer, sheet_name=str(name), index=index)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  return bytes_io.getvalue()
 
1
  import io
2
+ import time
3
  from typing import Iterable, Sequence
4
 
5
  import pandas as pd
6
 
7
 
8
  def write_dfs_to_excel(
9
+ dfs: Sequence[pd.DataFrame],
10
+ sheet_names: Sequence[str],
11
+ index: bool = True,
12
+ profile: dict | None = None,
13
  ) -> bytes:
14
  """Simple Excel export for Panel.
15
 
 
18
  and avoid Streamlit runtime warnings.
19
  """
20
  bytes_io = io.BytesIO()
21
+ t0 = time.perf_counter() if profile is not None else 0.0
22
  with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
23
  for df, name in zip(dfs, sheet_names):
24
  # Ensure we always write a valid DataFrame, even if None was passed
25
  safe_df = df if isinstance(df, pd.DataFrame) else pd.DataFrame()
26
+ t_sheet0 = time.perf_counter() if profile is not None else 0.0
27
  safe_df.to_excel(writer, sheet_name=str(name), index=index)
28
+ t_sheet1 = time.perf_counter() if profile is not None else 0.0
29
+
30
+ if profile is not None:
31
+ sheets = profile.get("excel_sheets")
32
+ if not isinstance(sheets, list):
33
+ sheets = []
34
+ profile["excel_sheets"] = sheets
35
+ try:
36
+ rows = int(len(safe_df))
37
+ except Exception: # noqa: BLE001
38
+ rows = 0
39
+ try:
40
+ cols = int(safe_df.shape[1])
41
+ except Exception: # noqa: BLE001
42
+ cols = 0
43
+ sheets.append(
44
+ {
45
+ "name": str(name),
46
+ "rows": rows,
47
+ "cols": cols,
48
+ "seconds": float(t_sheet1 - t_sheet0),
49
+ }
50
+ )
51
+
52
+ if profile is not None:
53
+ profile["excel_total_seconds"] = float(time.perf_counter() - t0)
54
 
55
  return bytes_io.getvalue()
panel_app/kpi_health_check_panel.py CHANGED
@@ -1388,6 +1388,11 @@ export_include_raw_data = pn.widgets.Checkbox(
1388
  value=True,
1389
  )
1390
 
 
 
 
 
 
1391
  alert_pack_button = pn.widgets.FileDownload(
1392
  label="Download Alert Pack",
1393
  filename="KPI_Alert_Pack.xlsx",
@@ -3241,8 +3246,13 @@ def run_health_check(event=None) -> None:
3241
  all_status = []
3242
  all_summary = []
3243
 
 
 
 
 
3244
  for rat, daily in current_daily_by_rat.items():
3245
  d = _filtered_daily(daily)
 
3246
  status_df, summary_df = evaluate_health_check(
3247
  d,
3248
  rat,
@@ -3253,6 +3263,14 @@ def run_health_check(event=None) -> None:
3253
  int(mcd),
3254
  granularity=str(granularity_select.value or "Daily"),
3255
  )
 
 
 
 
 
 
 
 
3256
  if not status_df.empty:
3257
  all_status.append(status_df)
3258
  if not summary_df.empty:
@@ -3266,11 +3284,15 @@ def run_health_check(event=None) -> None:
3266
  )
3267
  site_summary_table.value = current_summary_df
3268
 
 
3269
  current_multirat_raw, current_top_anomalies_raw = compute_multirat_views(
3270
  current_status_df
3271
  )
 
3272
 
 
3273
  traffic_df = _compute_site_traffic_gb(current_daily_by_rat)
 
3274
  if traffic_df is not None and not traffic_df.empty:
3275
  if current_multirat_raw is not None and not current_multirat_raw.empty:
3276
  current_multirat_raw = pd.merge(
@@ -3333,10 +3355,12 @@ def run_health_check(event=None) -> None:
3333
  current_export_bytes = None
3334
  current_alert_pack_bytes = None
3335
 
 
3336
  try:
3337
  current_delta_df = _compute_delta_df()
3338
  except Exception: # noqa: BLE001
3339
  current_delta_df = pd.DataFrame()
 
3340
  delta_table.value = current_delta_df
3341
 
3342
  _invalidate_drilldown_cache(healthcheck_changed=True)
@@ -3344,7 +3368,29 @@ def run_health_check(event=None) -> None:
3344
  _update_site_view()
3345
 
3346
  status_pane.alert_type = "success"
3347
- status_pane.object = "Health check completed."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3348
 
3349
  _refresh_validation_state()
3350
 
@@ -3353,7 +3399,7 @@ def run_health_check(event=None) -> None:
3353
  status_pane.object = f"Error: {exc}"
3354
 
3355
 
3356
- def _build_export_bytes() -> bytes:
3357
  include_raw = bool(export_include_raw_data.value)
3358
  daily_by_rat = (
3359
  current_daily_by_rat
@@ -3405,14 +3451,47 @@ def _build_export_bytes() -> bytes:
3405
  delta_df=(
3406
  current_delta_df if isinstance(current_delta_df, pd.DataFrame) else None
3407
  ),
 
3408
  )
3409
 
3410
 
3411
  def _export_callback() -> io.BytesIO:
3412
  global current_export_bytes
3413
- if current_export_bytes is None:
 
3414
  try:
3415
- current_export_bytes = _build_export_bytes()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3416
  except Exception: # noqa: BLE001
3417
  current_export_bytes = b""
3418
  return io.BytesIO(current_export_bytes or b"")
@@ -3949,6 +4028,7 @@ sidebar = pn.Column(
3949
  ),
3950
  pn.Card(
3951
  export_include_raw_data,
 
3952
  export_button,
3953
  alert_pack_button,
3954
  title="Export",
 
1388
  value=True,
1389
  )
1390
 
1391
+ perf_profiling = pn.widgets.Checkbox(
1392
+ name="Perf profiling",
1393
+ value=False,
1394
+ )
1395
+
1396
  alert_pack_button = pn.widgets.FileDownload(
1397
  label="Download Alert Pack",
1398
  filename="KPI_Alert_Pack.xlsx",
 
3246
  all_status = []
3247
  all_summary = []
3248
 
3249
+ do_profile = bool(perf_profiling.value)
3250
+ t_run0 = time.perf_counter() if do_profile else 0.0
3251
+ rat_timings = []
3252
+
3253
  for rat, daily in current_daily_by_rat.items():
3254
  d = _filtered_daily(daily)
3255
+ t0 = time.perf_counter() if do_profile else 0.0
3256
  status_df, summary_df = evaluate_health_check(
3257
  d,
3258
  rat,
 
3263
  int(mcd),
3264
  granularity=str(granularity_select.value or "Daily"),
3265
  )
3266
+ if do_profile:
3267
+ rat_timings.append(
3268
+ {
3269
+ "rat": str(rat),
3270
+ "seconds": float(time.perf_counter() - t0),
3271
+ "rows": int(len(d)) if isinstance(d, pd.DataFrame) else 0,
3272
+ }
3273
+ )
3274
  if not status_df.empty:
3275
  all_status.append(status_df)
3276
  if not summary_df.empty:
 
3284
  )
3285
  site_summary_table.value = current_summary_df
3286
 
3287
+ t_mr0 = time.perf_counter() if do_profile else 0.0
3288
  current_multirat_raw, current_top_anomalies_raw = compute_multirat_views(
3289
  current_status_df
3290
  )
3291
+ mr_seconds = float(time.perf_counter() - t_mr0) if do_profile else 0.0
3292
 
3293
+ t_tr0 = time.perf_counter() if do_profile else 0.0
3294
  traffic_df = _compute_site_traffic_gb(current_daily_by_rat)
3295
+ tr_seconds = float(time.perf_counter() - t_tr0) if do_profile else 0.0
3296
  if traffic_df is not None and not traffic_df.empty:
3297
  if current_multirat_raw is not None and not current_multirat_raw.empty:
3298
  current_multirat_raw = pd.merge(
 
3355
  current_export_bytes = None
3356
  current_alert_pack_bytes = None
3357
 
3358
+ t_dl0 = time.perf_counter() if do_profile else 0.0
3359
  try:
3360
  current_delta_df = _compute_delta_df()
3361
  except Exception: # noqa: BLE001
3362
  current_delta_df = pd.DataFrame()
3363
+ dl_seconds = float(time.perf_counter() - t_dl0) if do_profile else 0.0
3364
  delta_table.value = current_delta_df
3365
 
3366
  _invalidate_drilldown_cache(healthcheck_changed=True)
 
3368
  _update_site_view()
3369
 
3370
  status_pane.alert_type = "success"
3371
+ msg = "Health check completed."
3372
+ if do_profile:
3373
+ total_s = float(time.perf_counter() - t_run0)
3374
+ rat_timings_sorted = sorted(
3375
+ rat_timings, key=lambda x: float(x.get("seconds", 0.0)), reverse=True
3376
+ )
3377
+ top_lines = [
3378
+ f"- {r['rat']}: {r['seconds']:.2f}s (rows={r['rows']})"
3379
+ for r in rat_timings_sorted[:8]
3380
+ ]
3381
+ msg = (
3382
+ msg
3383
+ + "\n\nPerf (seconds)"
3384
+ + f"\n- total_run: {total_s:.2f}s"
3385
+ + (
3386
+ "\n- evaluate_health_check:"
3387
+ + ("\n" + "\n".join(top_lines) if top_lines else "")
3388
+ )
3389
+ + f"\n- compute_multirat_views: {mr_seconds:.2f}s"
3390
+ + f"\n- compute_site_traffic_gb: {tr_seconds:.2f}s"
3391
+ + f"\n- compute_delta_df: {dl_seconds:.2f}s"
3392
+ )
3393
+ status_pane.object = msg
3394
 
3395
  _refresh_validation_state()
3396
 
 
3399
  status_pane.object = f"Error: {exc}"
3400
 
3401
 
3402
+ def _build_export_bytes(profile: dict | None = None) -> bytes:
3403
  include_raw = bool(export_include_raw_data.value)
3404
  daily_by_rat = (
3405
  current_daily_by_rat
 
3451
  delta_df=(
3452
  current_delta_df if isinstance(current_delta_df, pd.DataFrame) else None
3453
  ),
3454
+ profile=profile,
3455
  )
3456
 
3457
 
3458
  def _export_callback() -> io.BytesIO:
3459
  global current_export_bytes
3460
+ do_profile = bool(perf_profiling.value)
3461
+ if do_profile or current_export_bytes is None:
3462
  try:
3463
+ t0 = time.perf_counter() if do_profile else 0.0
3464
+ profile = {} if do_profile else None
3465
+ current_export_bytes = _build_export_bytes(profile=profile)
3466
+ if do_profile:
3467
+ total_s = float(time.perf_counter() - t0)
3468
+ excel_s = (
3469
+ float(profile.get("excel_total_seconds", 0.0)) if profile else 0.0
3470
+ )
3471
+ prep_s = (
3472
+ float(profile.get("export_prep_seconds", 0.0)) if profile else 0.0
3473
+ )
3474
+ sheets = profile.get("excel_sheets") if profile else None
3475
+ slow = []
3476
+ if isinstance(sheets, list) and sheets:
3477
+ sheets2 = [s for s in sheets if isinstance(s, dict)]
3478
+ sheets2 = sorted(
3479
+ sheets2,
3480
+ key=lambda x: float(x.get("seconds", 0.0)),
3481
+ reverse=True,
3482
+ )
3483
+ slow = [
3484
+ f"- {s.get('name')}: {float(s.get('seconds', 0.0)):.2f}s (rows={s.get('rows')}, cols={s.get('cols')})"
3485
+ for s in sheets2[:8]
3486
+ ]
3487
+ status_pane.alert_type = "primary"
3488
+ status_pane.object = (
3489
+ "Export profiling"
3490
+ + f"\n- total_export: {total_s:.2f}s"
3491
+ + f"\n- export_prep: {prep_s:.2f}s"
3492
+ + f"\n- excel_write: {excel_s:.2f}s"
3493
+ + ("\n- slowest_sheets:\n" + "\n".join(slow) if slow else "")
3494
+ )
3495
  except Exception: # noqa: BLE001
3496
  current_export_bytes = b""
3497
  return io.BytesIO(current_export_bytes or b"")
 
4028
  ),
4029
  pn.Card(
4030
  export_include_raw_data,
4031
+ perf_profiling,
4032
  export_button,
4033
  alert_pack_button,
4034
  title="Export",
process_kpi/kpi_health_check/export.py CHANGED
@@ -16,7 +16,13 @@ def build_export_bytes(
16
  complaint_top_anomalies_df: pd.DataFrame | None = None,
17
  ops_queue_df: pd.DataFrame | None = None,
18
  delta_df: pd.DataFrame | None = None,
 
19
  ) -> bytes:
 
 
 
 
 
20
  dfs = [
21
  datasets_df if isinstance(datasets_df, pd.DataFrame) else pd.DataFrame(),
22
  rules_df if isinstance(rules_df, pd.DataFrame) else pd.DataFrame(),
@@ -86,4 +92,11 @@ def build_export_bytes(
86
  "Delta",
87
  ]
88
  )
89
- return write_dfs_to_excel(dfs, sheet_names, index=False)
 
 
 
 
 
 
 
 
16
  complaint_top_anomalies_df: pd.DataFrame | None = None,
17
  ops_queue_df: pd.DataFrame | None = None,
18
  delta_df: pd.DataFrame | None = None,
19
+ profile: dict | None = None,
20
  ) -> bytes:
21
+ if profile is not None:
22
+ profile["export_prep_seconds"] = 0.0
23
+ profile["excel_total_seconds"] = 0.0
24
+
25
+ t_prep0 = pd.Timestamp.utcnow() if profile is not None else None
26
  dfs = [
27
  datasets_df if isinstance(datasets_df, pd.DataFrame) else pd.DataFrame(),
28
  rules_df if isinstance(rules_df, pd.DataFrame) else pd.DataFrame(),
 
92
  "Delta",
93
  ]
94
  )
95
+
96
+ if profile is not None:
97
+ t_prep1 = pd.Timestamp.utcnow()
98
+ if t_prep0 is not None:
99
+ profile["export_prep_seconds"] = float((t_prep1 - t_prep0).total_seconds())
100
+ profile["sheet_count"] = int(len(sheet_names))
101
+
102
+ return write_dfs_to_excel(dfs, sheet_names, index=False, profile=profile)