import os
import shutil
import zipfile
import pathlib
import pandas as pd
import gradio as gr
import huggingface_hub
from autogluon.tabular import TabularPredictor

# Model configuration
MODEL_REPO_ID = "zacCMU/2024-24679-tabular-autolguon-predictor"
ZIP_FILENAME = "autogluon_predictor_dir.zip"
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"

def prepare_predictor():
    """Download and extract the AutoGluon predictor from HuggingFace Hub"""
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
    
    # Updated to remove deprecated parameter
    local_zip = huggingface_hub.hf_hub_download(
        repo_id=MODEL_REPO_ID,
        filename=ZIP_FILENAME,
        repo_type="model",
        local_dir=str(CACHE_DIR)
    )
    
    if EXTRACT_DIR.exists():
        shutil.rmtree(EXTRACT_DIR)
    EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
    
    with zipfile.ZipFile(local_zip, "r") as zf:
        zf.extractall(str(EXTRACT_DIR))
    
    contents = list(EXTRACT_DIR.iterdir())
    predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
    
    return str(predictor_root)

# Load the predictor with version mismatch allowed
PREDICTOR_DIR = prepare_predictor()
predictor = TabularPredictor.load(
    PREDICTOR_DIR, 
    require_py_version_match=False,
    require_version_match=False  # Added this parameter to bypass version check
)

# Define feature columns based on the actual model
FEATURE_COLS = [
    'FictionorNonfiction',
    'NumPages', 
    'ThicknessInches',
    'ReadUnfinishedorUnread'
]

# Target column
TARGET_COL = 'RecommendtoEveryone'

# Define mappings for categorical features
FICTION_NONFICTION_MAP = {
    "Fiction": 0,
    "Non-Fiction": 1
}

READ_STATUS_OPTIONS = ["Read", "Unfinished", "Unread"]

# Output labels
OUTCOME_LABELS = {
    'No': 'Not Recommended to Everyone',
    'Yes': 'Recommended to Everyone'
}

# Define reasonable ranges for numerical features
PAGE_RANGE = (1, 2000)
THICKNESS_RANGE = (0.1, 5.0)

def calculate_thickness_from_pages(num_pages):
    """
    Estimate book thickness based on number of pages.
    Assuming approximately 400 pages per inch (standard paperback).
    """
    return round(num_pages / 400, 2)

def predict_book_recommendation(
    fiction_or_nonfiction,
    num_pages,
    thickness_inches,
    read_status
):
    """
    Predict whether a book should be recommended to everyone
    based on its characteristics and reading status.
    """
    
    # Encode categorical features
    fiction_code = FICTION_NONFICTION_MAP[fiction_or_nonfiction]
    
    # Create input dataframe with exact feature names
    input_data = {
        'FictionorNonfiction': fiction_code,
        'NumPages': int(num_pages),
        'ThicknessInches': float(thickness_inches),
        'ReadUnfinishedorUnread': read_status
    }
    
    df_input = pd.DataFrame([input_data])
    
    # Make prediction
    prediction = predictor.predict(df_input)
    raw_pred = prediction.iloc[0]
    
    # Get prediction probabilities
    try:
        proba = predictor.predict_proba(df_input)
        if isinstance(proba, pd.Series):
            proba = proba.to_frame().T
        
        # Format probabilities for display
        proba_dict = {}
        for cls in proba.columns:
            label = OUTCOME_LABELS.get(cls, cls)
            proba_dict[label] = float(proba.iloc[0][cls])
        
        # Sort by probability
        proba_dict = dict(sorted(proba_dict.items(), key=lambda x: x[1], reverse=True))
    except Exception as e:
        print(f"Error getting probabilities: {e}")
        proba_dict = None
    
    # Format prediction label
    pred_label = OUTCOME_LABELS.get(raw_pred, raw_pred)
    
    # Create detailed output
    output_md = f"## 📚 Prediction Result\n\n"
    output_md += f"### **{pred_label}**\n\n"
    
    if proba_dict:
        confidence = max(proba_dict.values()) * 100
        output_md += f"**Confidence:** {confidence:.1f}%\n\n"
        output_md += "### Probability Distribution:\n"
        for label, prob in proba_dict.items():
            bar_length = int(prob * 20)
            bar = '█' * bar_length + '░' * (20 - bar_length)
            output_md += f"- {label}: {bar} {prob*100:.1f}%\n"
    
    # Add interpretation
    output_md += "\n### 📊 Input Summary:\n"
    output_md += f"- **Genre:** {fiction_or_nonfiction}\n"
    output_md += f"- **Length:** {int(num_pages)} pages ({thickness_inches:.1f} inches thick)\n"
    output_md += f"- **Reading Status:** {read_status}\n"
    
    return output_md, proba_dict

# Define example inputs for demonstration
EXAMPLES = [
    ["Fiction", 320, 0.8, "Read"],           # Popular fiction, completed
    ["Non-Fiction", 450, 1.1, "Unfinished"], # Non-fiction, not completed
    ["Fiction", 800, 2.0, "Read"],           # Long fiction, completed
    ["Non-Fiction", 150, 0.4, "Read"],       # Short non-fiction, completed
    ["Fiction", 250, 0.6, "Unread"],         # Medium fiction, unread
    ["Non-Fiction", 600, 1.5, "Unfinished"], # Long non-fiction, unfinished
]

# Create Gradio interface
with gr.Blocks(
    title="Book Recommendation Predictor",
    theme=gr.themes.Soft()
) as demo:
    # Header
    gr.Markdown("""
    # 📚 Book Recommendation Predictor
    
    ## About This Application
    This interactive tool uses a **Gradient-Boosted Decision Tree (LightGBM)** model trained with AutoGluon 
    to predict whether a book should be recommended to everyone based on its characteristics and your reading experience.
    
    ### How It Works
    The model analyzes:
    - **Book Type**: Fiction vs Non-Fiction content
    - **Book Length**: Number of pages and physical thickness
    - **Reading Experience**: Whether the book was completed, left unfinished, or unread
    
    These factors help determine if a book has universal appeal that warrants recommendation to all readers.
    """)
    
    gr.Markdown("---")
    
    # Input section
    gr.Markdown("### 📝 Enter Book Information")
    
    with gr.Row():
        with gr.Column(scale=1):
            fiction_input = gr.Radio(
                choices=["Fiction", "Non-Fiction"],
                value="Fiction",
                label="Book Genre",
                info="Is this book fiction or non-fiction?"
            )
            
            read_input = gr.Radio(
                choices=READ_STATUS_OPTIONS,
                value="Read",
                label="Reading Status",
                info="Have you completed this book?"
            )
        
        with gr.Column(scale=1):
            pages_input = gr.Slider(
                minimum=PAGE_RANGE[0],
                maximum=PAGE_RANGE[1],
                step=10,
                value=300,
                label="Number of Pages",
                info="Total page count of the book"
            )
            
            thickness_input = gr.Slider(
                minimum=THICKNESS_RANGE[0],
                maximum=THICKNESS_RANGE[1],
                step=0.1,
                value=0.75,
                label="Book Thickness (inches)",
                info="Physical thickness of the book"
            )
            
            # Auto-calculate thickness button
            auto_calc_btn = gr.Button("Auto-calculate thickness from pages", size="sm")
    
    # Prediction section
    gr.Markdown("---")
    
    with gr.Row():
        predict_btn = gr.Button("🔮 Make Prediction", variant="primary", scale=1)
        clear_btn = gr.Button("🔄 Clear", variant="secondary", scale=1)
    
    # Output section
    with gr.Row():
        with gr.Column(scale=2):
            output_text = gr.Markdown(label="Prediction Result")
        with gr.Column(scale=1):
            output_proba = gr.Label(
                label="Recommendation Probability",
                num_top_classes=2
            )
    
    # Examples section
    gr.Markdown("---")
    gr.Markdown("### 💡 Example Books")
    
    gr.Examples(
        examples=EXAMPLES,
        inputs=[fiction_input, pages_input, thickness_input, read_input],
        outputs=[output_text, output_proba],
        fn=predict_book_recommendation,
        label="Click any example to try it:",
        examples_per_page=6,
    )
    
    # Model information
    gr.Markdown("---")
    gr.Markdown("""
    ### 📊 Model Information
    
    **Model Details:**
    - **Type**: Gradient-Boosted Decision Tree (LightGBM) via AutoGluon
    - **Source**: [zacCMU/2024-24679-tabular-autolguon-predictor](https://huggingface.co/zacCMU/2024-24679-tabular-autolguon-predictor)
    - **Task**: Binary Classification (Recommend to Everyone: Yes/No)
    - **Accuracy**: 55% on test set
    
    **Key Features:**
    - Genre classification (Fiction vs Non-Fiction)
    - Book length metrics (pages and thickness)
    - Reader completion status
    
    **Limitations:**
    - Does not consider author, writing style, or publication date
    - May perform poorly for users with limited reading history
    - Performance may vary across niche or underrepresented genres
    
    ### 🎯 Interpretation Guide
    - **Recommended to Everyone**: Books with broad, universal appeal
    - **Not Recommended to Everyone**: Books that may appeal to specific audiences
    - **Confidence Score**: Higher percentages indicate stronger prediction certainty
    
    ---
    *Created for CMU 24-679 Course | Powered by AutoGluon & Gradio*
    """)
    
    # Connect functions
    predict_btn.click(
        fn=predict_book_recommendation,
        inputs=[fiction_input, pages_input, thickness_input, read_input],
        outputs=[output_text, output_proba]
    )
    
    # Auto-calculate thickness
    def update_thickness(pages):
        return calculate_thickness_from_pages(pages)
    
    auto_calc_btn.click(
        fn=update_thickness,
        inputs=[pages_input],
        outputs=[thickness_input]
    )
    
    # Clear function
    def clear_inputs():
        return "Fiction", 300, 0.75, "Read", "", None
    
    clear_btn.click(
        fn=clear_inputs,
        outputs=[fiction_input, pages_input, thickness_input, read_input, output_text, output_proba]
    )

if __name__ == "__main__":
    demo.launch()