# ==========================================================
# File: app.py  (Versi Stabil - Meguri RAG)
# FastAPI entrypoint untuk sistem RAG berbasis Hugging Face
# ==========================================================

import os
import sys
import tempfile
import asyncio
from fastapi import FastAPI, HTTPException, Request
from pydantic import BaseModel
from typing import List, Optional, Dict, Any

# ==========================================================
# 🔧 KONFIGURASI LINGKUNGAN
# ==========================================================
# Tempat aman untuk cache Hugging Face
os.environ["HF_HOME"] = os.path.join(tempfile.gettempdir(), "huggingface")
os.makedirs(os.environ["HF_HOME"], exist_ok=True)

# Token Hugging Face (diset di Settings > Secrets)
HF_TOKEN = os.getenv("HF_TOKEN", None)
if not HF_TOKEN:
    print("⚠️  Peringatan: HF_TOKEN tidak ditemukan di environment!")
else:
    print("🔑 HF_TOKEN ditemukan.")

# ==========================================================
# 🧩 IMPORT UTAMA
# ==========================================================
from huggingface_hub import login
from core import rag_services

# ==========================================================
# ⚙️ KONFIGURASI MODEL & RAG
# ==========================================================
APP_VERSION = "v12.3.4"
print(f"===== Application Startup at {os.getenv('START_TIME', 'Now')} =====")
print(f"🧩 App Version: {APP_VERSION}")

# Login ke Hugging Face
if HF_TOKEN:
    try:
        login(token=HF_TOKEN, add_to_git_credential=False)
        print("✅ Hugging Face login success")
    except Exception as e:
        print(f"⚠️ Gagal login ke Hugging Face: {e}")

# Model configuration
HF_MODEL_REPO = os.getenv("HF_MODEL_REPO", "ukung/Nusantara-1.8b-Indo-Chat-GGUF")
HF_GGUF_FILE = os.getenv("HF_GGUF_FILE", "Nusantara-1.8b-Indo-Chat-q4_k_m.gguf")
HF_MODEL_TYPE = os.getenv("HF_MODEL_TYPE", "llama")

print("⚙️ Model Configuration:")
print(f"   HF_MODEL_REPO : {HF_MODEL_REPO}")
print(f"   HF_GGUF_FILE  : {HF_GGUF_FILE}")
print(f"   HF_MODEL_TYPE : {HF_MODEL_TYPE}")

# ==========================================================
# 🧠 INISIALISASI RAG SERVICE
# ==========================================================
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# Load embedding model
def get_embedding_model():
    try:
        return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    except Exception as e:
        print(f"⚠️ Gagal memuat embedding model: {e}")
        return None

embedding_model = get_embedding_model()

# Coba muat index FAISS dari cache
vector_store = rag_services.load_vector_store(embedding_model)

# Jika belum ada, buat ulang
if vector_store is None:
    print("🧠 Inisialisasi RAG (manual mode)...")
    vector_store = rag_services.create_vector_store(embedding_model)
    if vector_store is None:
        print("⚠️ RAG initialization failed: ❌ Tidak ada dokumen untuk RAG.")
    else:
        print("✅ RAG siap digunakan!")

# ==========================================================
# 💬 MODEL CHAT CLIENT (LLM)
# ==========================================================
# Dummy inference client: simulasi model lokal / API
class DummyLLM:
    def __call__(self, prompt: str):
        return f"(Dummy Response) {prompt[:200]}..."

llm = DummyLLM()

# ==========================================================
# 🔗 BANGUN RAG CHAIN
# ==========================================================
if vector_store:
    rag_chain = rag_services.get_rag_chain(llm, vector_store)
else:
    rag_chain = None

# ==========================================================
# 🚀 FASTAPI SETUP
# ==========================================================
app = FastAPI(title="Meguri RAG Assistant", version=APP_VERSION)

class ChatRequest(BaseModel):
    question: str
    chat_history: Optional[List[Dict[str, Any]]] = []
    user_info: Optional[Dict[str, Any]] = {}

class ChatResponse(BaseModel):
    answer: str

@app.get("/")
async def root():
    return {
        "app": "Meguri RAG Assistant",
        "version": APP_VERSION,
        "status": "running",
    }

@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(req: ChatRequest):
    if not rag_chain:
        raise HTTPException(status_code=500, detail="RAG belum siap — vector store kosong.")
    try:
        result = await asyncio.to_thread(
            rag_chain.invoke,
            {"question": req.question, "chat_history": req.chat_history, "user_info": req.user_info},
        )
        return {"answer": result}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# ==========================================================
# 🔥 ENTRY POINT (untuk docker run manual)
# ==========================================================
if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app:app", host="0.0.0.0", port=7860)