|
|
|
|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import joblib |
|
|
import json |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
|
REPO_ID = "TarekMasryo/CreditCard-fraud-detection-ML" |
|
|
MODEL_FILENAME = "model_rf_cal.joblib" |
|
|
META_FILENAME = "meta.json" |
|
|
|
|
|
|
|
|
PCA_FEATURES = [f"V{i}" for i in range(1, 29)] |
|
|
ENGINEERED = ["Amount", "_log_amount", "Hour_from_start_mod24", |
|
|
"is_night_proxy", "is_business_hours_proxy"] |
|
|
FEATURES = PCA_FEATURES + ENGINEERED |
|
|
|
|
|
|
|
|
model_path = hf_hub_download(REPO_ID, MODEL_FILENAME) |
|
|
model = joblib.load(model_path) |
|
|
|
|
|
meta_path = hf_hub_download(REPO_ID, META_FILENAME) |
|
|
with open(meta_path, "r") as f: |
|
|
meta = json.load(f) |
|
|
|
|
|
|
|
|
DEFAULT_THR = float(meta["thresholds"]["rf_cal"]["p90"]) |
|
|
|
|
|
def ensure_engineered_columns(df: pd.DataFrame) -> pd.DataFrame: |
|
|
"""Create engineered columns if missing, using the same logic as training.""" |
|
|
df = df.copy() |
|
|
|
|
|
if "_log_amount" not in df.columns and "Amount" in df.columns: |
|
|
df["_log_amount"] = np.log1p(df["Amount"].astype(float)) |
|
|
|
|
|
|
|
|
if "Hour_from_start_mod24" not in df.columns and "Time" in df.columns: |
|
|
hours = (np.floor(df["Time"].astype(float) / 3600) % 24).astype(int) |
|
|
df["Hour_from_start_mod24"] = hours |
|
|
|
|
|
if "is_night_proxy" not in df.columns and "Hour_from_start_mod24" in df.columns: |
|
|
df["is_night_proxy"] = df["Hour_from_start_mod24"].isin([22,23,0,1,2,3,4,5]).astype(int) |
|
|
|
|
|
if "is_business_hours_proxy" not in df.columns and "Hour_from_start_mod24" in df.columns: |
|
|
df["is_business_hours_proxy"] = df["Hour_from_start_mod24"].between(9,17).astype(int) |
|
|
|
|
|
return df |
|
|
|
|
|
def predict_csv(file, threshold: float, return_all_rows: bool): |
|
|
|
|
|
df = pd.read_csv(file.name) |
|
|
df = ensure_engineered_columns(df) |
|
|
|
|
|
|
|
|
missing = [c for c in FEATURES if c not in df.columns] |
|
|
if missing: |
|
|
return f"❌ Missing required columns: {missing}. Provide these or include 'Time' and 'Amount' so the app can derive engineered features." |
|
|
|
|
|
|
|
|
probs = model.predict_proba(df[FEATURES])[:, 1] |
|
|
preds = (probs >= threshold).astype(int) |
|
|
|
|
|
out = df.copy() |
|
|
out["Fraud_Probability"] = probs |
|
|
out["Prediction"] = preds |
|
|
|
|
|
|
|
|
out_path = "predictions.csv" |
|
|
(out if return_all_rows else out.head(200)).to_csv(out_path, index=False) |
|
|
|
|
|
|
|
|
display_df = out if return_all_rows else out.head(50) |
|
|
return display_df, out_path |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# 💳 Credit Card Fraud Detection — Calibrated RF (HF Model)") |
|
|
gr.Markdown( |
|
|
"Upload a CSV with transaction rows. The app loads a calibrated Random Forest model " |
|
|
"and applies the **validation P≥90% threshold** by default. " |
|
|
"Required columns: V1..V28, Amount, and either engineered features or a raw Time column " |
|
|
"(seconds from start) so the app can derive them." |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
file_in = gr.File(label="Upload CSV", file_types=[".csv"]) |
|
|
with gr.Row(): |
|
|
thr = gr.Slider(minimum=0.0, maximum=1.0, value=DEFAULT_THR, step=0.001, |
|
|
label=f"Decision Threshold (default P≥90% = {DEFAULT_THR:.3f})") |
|
|
all_rows = gr.Checkbox(label="Return all rows (uncheck to preview first 50)", value=False) |
|
|
|
|
|
btn = gr.Button("Predict") |
|
|
|
|
|
out_df = gr.Dataframe(label="Predictions") |
|
|
out_file = gr.File(label="Download predictions.csv") |
|
|
|
|
|
btn.click(fn=predict_csv, inputs=[file_in, thr, all_rows], outputs=[out_df, out_file]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|