import pprint
import numpy as np
import gradio as gr
import src.datasets as datasets
import src.models as models
import src.commons as commons
from src.customlogger import logger

SHARE = False
TOP_K = 5

def setup():
    """Set up the environment by loading the model, tokenizer, and dataset."""
    tokenizer = models.load_tokenizer()
    model = models.load_model()
    proverbs = datasets.load_proverbs()
    index = commons.load_or_create_index(tokenizer, model, proverbs)
    return index, tokenizer, model, proverbs


def sample_inference_results(inference_results: list[dict]) -> list[dict]:
    """Sample one match per input from inference results, with probability inversely proportional to distance."""
    sampled = []
    for result in inference_results:
        matches = result['matches']
        distances = np.array([m['distance'] for m in matches])
        # Convert distances to probabilities using softmax
        # over negative distances (closer = higher prob)
        alpha = 1000
        exp_scores = np.exp(-alpha * distances)
        probs = exp_scores / exp_scores.sum()
        sample = np.random.choice(len(result['matches']), p=probs)
        sampled.append({
            'input': result['input'],
            'matches': [match | {"prob": float(probs[i])} for i, match in enumerate(matches)],
            'sample': result['matches'][sample],
            "prob": float(probs[sample]),
        })
    return sampled


def build_response_from_result(result: dict) -> str:
    response = f"**Top {len(result['matches'])} matches:**\n"
    matches = sorted(result["matches"], key=lambda x: x["rank"])
    for match in matches:
        response += f"{match['rank']}. \"{match['proverb']}\" *(distance = {match['distance']:.6f}, probability = {match['prob']*100:.2f}%)*\n"
    return response


if __name__ == "__main__":
    index, tokenizer, model, proverbs = setup()

    def run(message, history, detailed_response):
        # Run inference on the input message
        logger.info(f"Running inference for message \"{message}\"...")
        results = commons.inference(
            [message], index, tokenizer, model, proverbs, k=TOP_K
        )

        # Sample one match per input from inference results
        sampled = sample_inference_results(results)
        logger.info(f"Inference result:\n{pprint.pformat(sampled[0])}")
        
        # Build the final response with the details or just the proverb
        if detailed_response:
            response = build_response_from_result(sampled[0])
        else:
            response = sampled[0]["sample"]["proverb"]
        return response

    # Create the Gradio interface
    with gr.Blocks() as demo:
        with gr.Row():
                gr.Markdown(
                    "# A virtual Catalan grandparent"
                )
        with gr.Row(scale=1):
            with gr.Column(scale=1):
                image = gr.Image("https://drive.usercontent.google.com/download?id=1nRCfIFpXwNh5HASfue0r9WPAnUSM18_a")
                gr.Markdown(
                    "This is the demo of \"A virtual Catalan grandparent\", a project built as part of a Natural Language Processing course.\n\n"
                    "You can write anything and you will get a fitting proverb as the response.\n\n"
                    "> The application is based on a pre-trained transformer model, " +
                    "[projecte-aina/roberta-base-ca-v2](https://huggingface.co/projecte-aina/roberta-base-ca-v2) " +
                    "and uses normalized L2 distance between embedding to select the most appropriate proverb in the database, " +
                    "available [here](https://huggingface.co/datasets/pauhmolins/catalan-proverbs).\n\n"
                )
            with gr.Column(scale=2):
                detailed_response_checkbox = gr.Checkbox(
                    label="Show detailed response", render=False, value=False)
                accordion = gr.Accordion(
                    label="Options", render=False, open=False)
                chat = gr.ChatInterface(
                    fn=run,
                    type="messages",
                    additional_inputs = [detailed_response_checkbox],
                    additional_inputs_accordion=accordion,
                )
    demo.launch(share=SHARE)