import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TextIteratorStreamer
)
from threading import Thread
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
from fpdf import FPDF
import re

# --- MODEL SETUP ---
model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"

# Load model and tokenizer once (Runs when main.py is imported)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.float16,
    device_map="auto",
    offload_folder="./offload"
)

# Wrap Transformers pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(
    pipeline=pipe,
    model_kwargs={
        "max_new_tokens": 4096,
        "do_sample": True,
        "temperature": 0.2,
        "repetition_penalty": 1.05,
        "eos_token_id": tokenizer.eos_token_id,
    }
)

# Define the Prompt Template (used by the test_case function)
test_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert QA engineer.
STRICTLY follow these rules for your output:
- Generate EXACTLY 10 numbered test cases (1–5 functional, 6–10 edge cases).
- Output ONLY the numbered list.
- DO NOT include explanations, headers, filler text, or markdown.
- Each test MUST be a single, concise sentence.
- Begin your response immediately with '1. '""",
        ),
        (
            "user",
            "Generate test cases for the following code:\n{code}",
        ),
    ]
)

# --- TEST CASE GENERATION FUNCTION ---
def test_case(code):
    
    # FIX: Define and INVOKE the test_chain to resolve UnboundLocalError
    test_chain = test_prompt | llm | StrOutputParser()
    test_cases = test_chain.invoke({"code": code})
    
    print("\n[LOG] 1. LLM Raw Output Length:", len(test_cases))

    # Aggressive cleaning (Removes markdown blocks and standalone markers)
    test_cases = re.sub(r"```.*?```", "", test_cases, flags=re.DOTALL)
    test_cases = re.sub(r"```", "", test_cases)
    test_cases = test_cases.strip()

    # Guardrail: If the LLM returns nothing, force a known output
    if not test_cases:
        test_cases = "Error: Test case generation failed or returned empty content."
    
    print("\n[LOG] 2. Cleaned Text (for PDF):", test_cases)

    # Pass the clean UTF-8 string directly
    safe_text = test_cases
    
    pdf = FPDF()
    pdf.add_page()
    
    # FPDF FIX: Add and use a Unicode-compatible font (DejaVuSans)
    try:
        # Requires 'fonts-dejavu' package installed in Dockerfile
        pdf.add_font("DejaVuSans", style="", fname="/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf")
        pdf.set_font("DejaVuSans", size=12)
    except Exception as e:
        print(f"[LOG] FPDF Font Error: {e}. Falling back to Arial.")
        pdf.set_font("Arial", size=12)

    # Set title and content cell
    pdf.multi_cell(0, 10, txt="--- Generated Test Cases ---", align='C')
    pdf.multi_cell(0, 10, txt=safe_text)
    
    # CRITICAL CHANGE: Get bytes object directly from FPDF2 with error handling
    try:
        pdf_string = pdf.output(dest='S')
        # Use 'replace' to safely handle any non-latin-1 characters FPDF might leave
        pdf_bytes = pdf_string.encode('latin-1', 'replace') 
        
        print("\n[LOG] 3. PDF Bytes Length:", len(pdf_bytes))
        
        # Check to ensure the file is not empty (i.e., less than a blank document)
        if len(pdf_bytes) < 100:
            print("[CRITICAL LOG] FPDF generated very small file, likely failed.")
            
            # Fallback PDF: Creates a new PDF with the error message
            error_pdf = FPDF()
            error_pdf.add_page()
            error_pdf.set_font("Arial", size=12)
            error_pdf.multi_cell(0, 10, txt="ERROR: PDF generation failed to create content. Check logs.")
            return error_pdf.output(dest='S').encode('latin-1')
            
        return pdf_bytes
        
    except Exception as e:
        print(f"[FATAL LOG] PDF output failed with error: {e}")
        # Fatal Fallback PDF: Creates a new PDF with the fatal error message
        error_pdf = FPDF()
        error_pdf.add_page()
        error_pdf.set_font("Arial", size=12)
        error_pdf.multi_cell(0, 10, txt=f"FATAL ERROR: PDF generation crashed. Reason: {e}")
        return error_pdf.output(dest='S').encode('latin-1')
    
    
MODEL_PATH = "jaiwinrc7/Qwen2.5-Coder-0.5B-finetunned-merged"


def load_model_and_tokenizer():
    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_PATH,
        trust_remote_code=True
    )

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        device_map="cpu",
        trust_remote_code=True,
        torch_dtype=torch.float32,   # IMPORTANT: faster on CPU
    )

    model.eval()
    return tokenizer, model


def build_prompt(lang, task):
    # Keep prompt SIMPLE for speed
    return f"""You are a coding assistant.
            Write {lang} code for the following task:

            {task}

            Code:
            """


def generate_code_stream(lang, user_input, tokenizer, model):
    prompt = build_prompt(lang, user_input)
    inputs = tokenizer(prompt, return_tensors="pt")

    streamer = TextIteratorStreamer(
        tokenizer,
        skip_prompt=True,
        skip_special_tokens=True
    )

    generation_kwargs = dict(
        **inputs,
        max_new_tokens=250,     
        do_sample=False,
        temperature=0.0,
        use_cache=True,
        streamer=streamer,
        pad_token_id=tokenizer.eos_token_id,
    )

    # Run generation in background thread
    thread = Thread(
        target=model.generate,
        kwargs=generation_kwargs
    )
    thread.start()

    # Yield tokens as they arrive
    for token in streamer:
        yield token