Add export raw data toggle with optimized health check evaluation using vectorized operations and improved timestamp handling for baseline/recent period masking with persistent bad date detection
Browse files
panel_app/kpi_health_check_panel.py
CHANGED
|
@@ -1383,6 +1383,11 @@ export_button = pn.widgets.FileDownload(
|
|
| 1383 |
button_type="primary",
|
| 1384 |
)
|
| 1385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1386 |
alert_pack_button = pn.widgets.FileDownload(
|
| 1387 |
label="Download Alert Pack",
|
| 1388 |
filename="KPI_Alert_Pack.xlsx",
|
|
@@ -3349,6 +3354,12 @@ def run_health_check(event=None) -> None:
|
|
| 3349 |
|
| 3350 |
|
| 3351 |
def _build_export_bytes() -> bytes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3352 |
return build_export_bytes(
|
| 3353 |
datasets_df=(
|
| 3354 |
datasets_table.value
|
|
@@ -3364,9 +3375,7 @@ def _build_export_bytes() -> bytes:
|
|
| 3364 |
status_df=(
|
| 3365 |
current_status_df if isinstance(current_status_df, pd.DataFrame) else None
|
| 3366 |
),
|
| 3367 |
-
daily_by_rat=
|
| 3368 |
-
current_daily_by_rat if isinstance(current_daily_by_rat, dict) else None
|
| 3369 |
-
),
|
| 3370 |
granularity=str(granularity_select.value or "Daily"),
|
| 3371 |
multirat_summary_df=(
|
| 3372 |
current_multirat_df
|
|
@@ -3617,6 +3626,13 @@ def _on_drilldown_params_change(event=None) -> None:
|
|
| 3617 |
_refresh_validation_state()
|
| 3618 |
|
| 3619 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3620 |
def _on_granularity_change(event=None) -> None:
|
| 3621 |
if _applying_profile or _loading_datasets:
|
| 3622 |
return
|
|
@@ -3665,6 +3681,8 @@ recent_days.param.watch(_on_drilldown_params_change, "value")
|
|
| 3665 |
rel_threshold_pct.param.watch(_on_drilldown_params_change, "value")
|
| 3666 |
min_consecutive_days.param.watch(_on_drilldown_params_change, "value")
|
| 3667 |
|
|
|
|
|
|
|
| 3668 |
|
| 3669 |
def _on_rules_table_change(event=None) -> None:
|
| 3670 |
global current_export_bytes, current_alert_pack_bytes
|
|
@@ -3930,6 +3948,7 @@ sidebar = pn.Column(
|
|
| 3930 |
sizing_mode="stretch_width",
|
| 3931 |
),
|
| 3932 |
pn.Card(
|
|
|
|
| 3933 |
export_button,
|
| 3934 |
alert_pack_button,
|
| 3935 |
title="Export",
|
|
|
|
| 1383 |
button_type="primary",
|
| 1384 |
)
|
| 1385 |
|
| 1386 |
+
export_include_raw_data = pn.widgets.Checkbox(
|
| 1387 |
+
name="Include raw KPI data (slow)",
|
| 1388 |
+
value=True,
|
| 1389 |
+
)
|
| 1390 |
+
|
| 1391 |
alert_pack_button = pn.widgets.FileDownload(
|
| 1392 |
label="Download Alert Pack",
|
| 1393 |
filename="KPI_Alert_Pack.xlsx",
|
|
|
|
| 3354 |
|
| 3355 |
|
| 3356 |
def _build_export_bytes() -> bytes:
|
| 3357 |
+
include_raw = bool(export_include_raw_data.value)
|
| 3358 |
+
daily_by_rat = (
|
| 3359 |
+
current_daily_by_rat
|
| 3360 |
+
if (include_raw and isinstance(current_daily_by_rat, dict))
|
| 3361 |
+
else None
|
| 3362 |
+
)
|
| 3363 |
return build_export_bytes(
|
| 3364 |
datasets_df=(
|
| 3365 |
datasets_table.value
|
|
|
|
| 3375 |
status_df=(
|
| 3376 |
current_status_df if isinstance(current_status_df, pd.DataFrame) else None
|
| 3377 |
),
|
| 3378 |
+
daily_by_rat=daily_by_rat,
|
|
|
|
|
|
|
| 3379 |
granularity=str(granularity_select.value or "Daily"),
|
| 3380 |
multirat_summary_df=(
|
| 3381 |
current_multirat_df
|
|
|
|
| 3626 |
_refresh_validation_state()
|
| 3627 |
|
| 3628 |
|
| 3629 |
+
def _on_export_options_change(event=None) -> None:
|
| 3630 |
+
global current_export_bytes
|
| 3631 |
+
if _applying_profile or _loading_datasets:
|
| 3632 |
+
return
|
| 3633 |
+
current_export_bytes = None
|
| 3634 |
+
|
| 3635 |
+
|
| 3636 |
def _on_granularity_change(event=None) -> None:
|
| 3637 |
if _applying_profile or _loading_datasets:
|
| 3638 |
return
|
|
|
|
| 3681 |
rel_threshold_pct.param.watch(_on_drilldown_params_change, "value")
|
| 3682 |
min_consecutive_days.param.watch(_on_drilldown_params_change, "value")
|
| 3683 |
|
| 3684 |
+
export_include_raw_data.param.watch(_on_export_options_change, "value")
|
| 3685 |
+
|
| 3686 |
|
| 3687 |
def _on_rules_table_change(event=None) -> None:
|
| 3688 |
global current_export_bytes, current_alert_pack_bytes
|
|
|
|
| 3948 |
sizing_mode="stretch_width",
|
| 3949 |
),
|
| 3950 |
pn.Card(
|
| 3951 |
+
export_include_raw_data,
|
| 3952 |
export_button,
|
| 3953 |
alert_pack_button,
|
| 3954 |
title="Export",
|
process_kpi/kpi_health_check/engine.py
CHANGED
|
@@ -111,7 +111,7 @@ def evaluate_health_check(
|
|
| 111 |
int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
|
| 112 |
)
|
| 113 |
|
| 114 |
-
end_dt = _to_timestamp(daily[time_col].max())
|
| 115 |
if end_dt is None:
|
| 116 |
return pd.DataFrame(), pd.DataFrame()
|
| 117 |
|
|
@@ -121,6 +121,11 @@ def evaluate_health_check(
|
|
| 121 |
|
| 122 |
rat_rules = rules_df[rules_df["RAT"] == rat].copy()
|
| 123 |
kpis = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
rows = []
|
| 126 |
|
|
@@ -131,9 +136,12 @@ def evaluate_health_check(
|
|
| 131 |
else None
|
| 132 |
)
|
| 133 |
g_site = g_site.sort_values(time_col)
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
for kpi in kpis:
|
| 136 |
-
rule =
|
| 137 |
direction = str(rule.get("direction", "higher_is_better"))
|
| 138 |
policy = str(rule.get("policy", "enforce") or "enforce").strip().lower()
|
| 139 |
sla = rule.get("sla", np.nan)
|
|
@@ -144,8 +152,9 @@ def evaluate_health_check(
|
|
| 144 |
|
| 145 |
sla_eval = None if policy == "notify" else sla_val
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
|
|
|
| 149 |
rows.append(
|
| 150 |
{
|
| 151 |
"RAT": rat,
|
|
@@ -157,29 +166,38 @@ def evaluate_health_check(
|
|
| 157 |
)
|
| 158 |
continue
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
|
| 164 |
baseline = (
|
| 165 |
-
|
| 166 |
)
|
| 167 |
-
recent =
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
)
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
max_streak = max_consecutive_periods(bad_dates, step=step)
|
| 185 |
persistent = max_streak >= int(min_periods)
|
|
@@ -193,16 +211,20 @@ def evaluate_health_check(
|
|
| 193 |
)
|
| 194 |
|
| 195 |
is_bad_current = is_bad_recent
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
had_bad_recent = (len(bad_dates) > 0) or bool(is_bad_recent)
|
| 208 |
|
|
|
|
| 111 |
int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
|
| 112 |
)
|
| 113 |
|
| 114 |
+
end_dt = _to_timestamp(pd.to_datetime(daily[time_col], errors="coerce").max())
|
| 115 |
if end_dt is None:
|
| 116 |
return pd.DataFrame(), pd.DataFrame()
|
| 117 |
|
|
|
|
| 121 |
|
| 122 |
rat_rules = rules_df[rules_df["RAT"] == rat].copy()
|
| 123 |
kpis = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
|
| 124 |
+
rules_by_kpi = {
|
| 125 |
+
str(r["KPI"]): r
|
| 126 |
+
for r in rat_rules.to_dict(orient="records")
|
| 127 |
+
if str(r.get("KPI", ""))
|
| 128 |
+
}
|
| 129 |
|
| 130 |
rows = []
|
| 131 |
|
|
|
|
| 136 |
else None
|
| 137 |
)
|
| 138 |
g_site = g_site.sort_values(time_col)
|
| 139 |
+
t_all = pd.to_datetime(g_site[time_col], errors="coerce")
|
| 140 |
+
baseline_mask_all = (t_all >= baseline_start_dt) & (t_all <= baseline_end_dt)
|
| 141 |
+
recent_mask_all = (t_all >= recent_start_dt) & (t_all <= recent_end_dt)
|
| 142 |
|
| 143 |
for kpi in kpis:
|
| 144 |
+
rule = rules_by_kpi.get(str(kpi), {})
|
| 145 |
direction = str(rule.get("direction", "higher_is_better"))
|
| 146 |
policy = str(rule.get("policy", "enforce") or "enforce").strip().lower()
|
| 147 |
sla = rule.get("sla", np.nan)
|
|
|
|
| 152 |
|
| 153 |
sla_eval = None if policy == "notify" else sla_val
|
| 154 |
|
| 155 |
+
vals = pd.to_numeric(g_site[kpi], errors="coerce")
|
| 156 |
+
has_any = bool(vals.notna().any())
|
| 157 |
+
if not has_any:
|
| 158 |
rows.append(
|
| 159 |
{
|
| 160 |
"RAT": rat,
|
|
|
|
| 166 |
)
|
| 167 |
continue
|
| 168 |
|
| 169 |
+
baseline_vals = vals.loc[baseline_mask_all]
|
| 170 |
+
recent_vals = vals.loc[recent_mask_all]
|
| 171 |
+
t_recent = t_all.loc[recent_vals.index]
|
| 172 |
|
| 173 |
baseline = (
|
| 174 |
+
baseline_vals.median(skipna=True) if baseline_mask_all.any() else np.nan
|
| 175 |
)
|
| 176 |
+
recent = (
|
| 177 |
+
recent_vals.median(skipna=True) if recent_mask_all.any() else np.nan
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
bad_dates: list = []
|
| 181 |
+
if recent_mask_all.any() and recent_vals.notna().any():
|
| 182 |
+
thr = float(rel_threshold_pct) / 100.0
|
| 183 |
+
b = float(baseline) if pd.notna(baseline) else None
|
| 184 |
+
bad_series = pd.Series(False, index=recent_vals.index)
|
| 185 |
+
|
| 186 |
+
if b is not None:
|
| 187 |
+
if direction == "higher_is_better":
|
| 188 |
+
bad_series = bad_series | (recent_vals < (b - abs(b) * thr))
|
| 189 |
+
else:
|
| 190 |
+
bad_series = bad_series | (recent_vals > (b + abs(b) * thr))
|
| 191 |
+
|
| 192 |
+
if sla_eval is not None and pd.notna(sla_eval):
|
| 193 |
+
if direction == "higher_is_better":
|
| 194 |
+
bad_series = bad_series | (recent_vals < float(sla_eval))
|
| 195 |
+
else:
|
| 196 |
+
bad_series = bad_series | (recent_vals > float(sla_eval))
|
| 197 |
+
|
| 198 |
+
bad_series = bad_series & recent_vals.notna() & t_recent.notna()
|
| 199 |
+
if bool(bad_series.any()):
|
| 200 |
+
bad_dates = t_recent.loc[bad_series].tolist()
|
| 201 |
|
| 202 |
max_streak = max_consecutive_periods(bad_dates, step=step)
|
| 203 |
persistent = max_streak >= int(min_periods)
|
|
|
|
| 211 |
)
|
| 212 |
|
| 213 |
is_bad_current = is_bad_recent
|
| 214 |
+
try:
|
| 215 |
+
last_mask = recent_mask_all & vals.notna() & t_all.notna()
|
| 216 |
+
if bool(last_mask.any()):
|
| 217 |
+
idx_last = t_all.loc[last_mask].idxmax()
|
| 218 |
+
last_val = vals.loc[idx_last]
|
| 219 |
+
is_bad_current = is_bad(
|
| 220 |
+
float(last_val) if pd.notna(last_val) else None,
|
| 221 |
+
float(baseline) if pd.notna(baseline) else None,
|
| 222 |
+
direction,
|
| 223 |
+
rel_threshold_pct,
|
| 224 |
+
sla_eval,
|
| 225 |
+
)
|
| 226 |
+
except Exception: # noqa: BLE001
|
| 227 |
+
pass
|
| 228 |
|
| 229 |
had_bad_recent = (len(bad_dates) > 0) or bool(is_bad_recent)
|
| 230 |
|