# main.py
import gradio as gr
from parrot import Parrot
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import re
import time
import os
from ddgs import DDGS
from googleapiclient.discovery import build
from dotenv import load_dotenv

# -----------------------------
# Setup
# -----------------------------
load_dotenv()
nltk.data.path.append("./nltk_data")  # Local punkt
parrot = None  # Lazy-loaded global model

# Load Google credentials
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GOOGLE_CX = os.getenv("GOOGLE_CX")

if not GOOGLE_API_KEY or not GOOGLE_CX:
    print("⚠️ Warning: GOOGLE_API_KEY or GOOGLE_CX not set. Google fallback may fail.")

MAX_TOKENS = 150


# -----------------------------
# Parrot Model Loader
# -----------------------------
def get_parrot():
    global parrot
    if parrot is None:
        print("⏳ Loading Parrot model...")
        parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=False)
        print("✅ Parrot model loaded!")
    return parrot


# -----------------------------
# Helper Functions
# -----------------------------
def clean_sentence(sent):
    sent = sent.strip()
    sent = re.sub(r"[.!?]+$", "", sent)
    if sent:
        sent = sent[0].upper() + sent[1:]
    if not sent.endswith("."):
        sent += "."
    return sent


def clean_sentences(sentences):
    cleaned = [clean_sentence(s) for s in sentences if s.strip()]
    return " ".join(cleaned)


def split_long_sentence(sentence, max_tokens=MAX_TOKENS):
    words = word_tokenize(sentence)
    return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]


def with_retry(func, *args, retries=1, delay=3, **kwargs):
    for attempt in range(retries + 1):
        try:
            return func(*args, **kwargs)
        except Exception as e:
            print(f"⚠️ Attempt {attempt + 1} failed: {e}")
            if attempt < retries:
                print("🔁 Retrying...")
                time.sleep(delay)
    return None


# -----------------------------
# 🔹 Full Paragraph Rephraser
# -----------------------------
def rephrase(text):
    model = get_parrot()
    sentences = sent_tokenize(text)
    rephrased = []

    for s in sentences:
        chunks = split_long_sentence(s)
        paraphrased_chunks = []
        for c in chunks:
            p = with_retry(
                model.augment,
                input_phrase=c,
                do_diverse=True,
                adequacy_threshold=0.7,
                fluency_threshold=0.7,
            )
            if p:
                paraphrased_chunks.append(p[0][0])
            else:
                paraphrased_chunks.append(c)
        rephrased.append(" ".join(paraphrased_chunks))

    return clean_sentences(rephrased)


# -----------------------------
# 🔹 Sentence-wise Paraphrases
# -----------------------------
def generate_unique_paraphrases(sentence, N_OPTIONS=3):
    model = get_parrot()
    paraphrases = with_retry(
        model.augment,
        input_phrase=sentence,
        do_diverse=True,
        adequacy_threshold=0.7,
        fluency_threshold=0.7,
    )
    if not paraphrases:
        return [sentence]
    texts = [p[0] for p in paraphrases]
    unique = []
    for t in texts:
        if t not in unique:
            unique.append(t)
        if len(unique) == N_OPTIONS:
            break
    return unique


def rephrase_sentencewise_unique(text, N_OPTIONS=3):
    sentences = sent_tokenize(text.strip())
    results = []
    for idx, s in enumerate(sentences, 1):
        paraphrases = generate_unique_paraphrases(s, N_OPTIONS)
        paraphrases = [clean_sentence(p) for p in paraphrases]
        formatted = f"Sentence {idx}: {s}\n"
        for i, opt in enumerate(paraphrases, 1):
            formatted += f"  Option {i}: {opt}\n"
        results.append(formatted)
    return "\n".join(results)


# -----------------------------
# 🔹 Hybrid Plagiarism Detection (DuckDuckGo + Google)
# -----------------------------
def search_duckduckgo(query):
    try:
        with DDGS() as ddgs:
            results = list(ddgs.text(f'"{query}"', max_results=3))
            return results, len(results)
    except Exception as e:
        print(f"⚠️ DDG error: {e}")
        return [], 0

from difflib import SequenceMatcher

def similarity(a, b):
    return SequenceMatcher(None, a, b).ratio()
    
def detect_duckduckgo(text, threshold=0.8):
    sentences = sent_tokenize(text)
    matches = []
    for sent in sentences:
        if len(sent.split()) < 6:  # skip short or generic lines
            continue
        results, _ = search_duckduckgo(sent)
        for res in results:
            snippet = res.get("body", "")
            sim = similarity(sent.lower(), snippet.lower())
            if sim >= threshold:
                matches.append((sent, sim, res.get("href", "Unknown")))
                break
        time.sleep(1)
    return matches


def search_google(query):
    if not GOOGLE_API_KEY or not GOOGLE_CX:
        return [], 0
    try:
        service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
        res = service.cse().list(q=f'"{query}"', cx=GOOGLE_CX, num=3).execute()
        items = res.get("items", [])
        total = int(res.get("searchInformation", {}).get("totalResults", 0))
        return items, total
    except Exception as e:
        print(f"⚠️ Google search error: {e}")
        return [], 0


def detect_google(text):
    sentences = sent_tokenize(text)
    matches = []
    for sent in sentences:
        results, total = search_google(sent)
        if total > 0:
            url = results[0].get("link", "Unknown")
            matches.append((sent, total, url))
        time.sleep(0.3)
    return matches


def hybrid_detect(text):
    ddg_matches = detect_duckduckgo(text)
    matches = ddg_matches if ddg_matches else detect_google(text)
    highlighted = text
    urls = []
    for sent, _, url in matches:
        highlighted = highlighted.replace(sent, f"**{sent}**")
        urls.append(url)
    return {"highlighted_text": highlighted, "sources": urls}


# -----------------------------
# Warm-up Parrot Model
# -----------------------------
def warmup():
    try:
        print("🔥 Warming up Parrot model...")
        model = get_parrot()
        _ = model.augment(input_phrase="hello world", do_diverse=False)
        print("✅ Warmup complete.")
    except Exception as e:
        print(f"⚠️ Warmup skipped: {e}")


warmup()


# -----------------------------
# 🔹 Gradio UI
# -----------------------------
rephrase_iface = gr.Interface(
    fn=rephrase,
    inputs=gr.Textbox(lines=10, placeholder="Paste your text here..."),
    outputs="text",
    title="🦜 Parrot Rephraser (Long Text)",
    description="Rephrase paragraphs while maintaining meaning.",
)

sentencewise_iface = gr.Interface(
    fn=rephrase_sentencewise_unique,
    inputs=gr.Textbox(lines=10, placeholder="Paste text here..."),
    outputs="text",
    title="🧩 Sentence-wise Paraphraser",
    description="Generates top 3 diverse rephrases per sentence.",
)

plagiarism_iface = gr.Interface(
    fn=hybrid_detect,
    inputs=gr.Textbox(lines=10, placeholder="Paste text to check plagiarism..."),
    outputs=gr.JSON(),
    title="🔍 Hybrid Plagiarism Detector",
    description="Detects copied sentences using DuckDuckGo & Google Custom Search.",
)

demo = gr.TabbedInterface(
    [rephrase_iface, sentencewise_iface, plagiarism_iface],
    ["Full Text Rephraser", "Sentence-wise Paraphrases", "Plagiarism Checker"],
)

demo.launch(server_port=7860, server_name="0.0.0.0", show_error=True)