# ========================================================== # File: app.py (Versi Stabil - Meguri RAG) # FastAPI entrypoint untuk sistem RAG berbasis Hugging Face # ========================================================== import os import sys import tempfile import asyncio from fastapi import FastAPI, HTTPException, Request from pydantic import BaseModel from typing import List, Optional, Dict, Any # ========================================================== # 🔧 KONFIGURASI LINGKUNGAN # ========================================================== # Tempat aman untuk cache Hugging Face os.environ["HF_HOME"] = os.path.join(tempfile.gettempdir(), "huggingface") os.makedirs(os.environ["HF_HOME"], exist_ok=True) # Token Hugging Face (diset di Settings > Secrets) HF_TOKEN = os.getenv("HF_TOKEN", None) if not HF_TOKEN: print("⚠️ Peringatan: HF_TOKEN tidak ditemukan di environment!") else: print("🔑 HF_TOKEN ditemukan.") # ========================================================== # 🧩 IMPORT UTAMA # ========================================================== from huggingface_hub import login from core import rag_services # ========================================================== # ⚙️ KONFIGURASI MODEL & RAG # ========================================================== APP_VERSION = "v12.3.4" print(f"===== Application Startup at {os.getenv('START_TIME', 'Now')} =====") print(f"🧩 App Version: {APP_VERSION}") # Login ke Hugging Face if HF_TOKEN: try: login(token=HF_TOKEN, add_to_git_credential=False) print("✅ Hugging Face login success") except Exception as e: print(f"⚠️ Gagal login ke Hugging Face: {e}") # Model configuration HF_MODEL_REPO = os.getenv("HF_MODEL_REPO", "ukung/Nusantara-1.8b-Indo-Chat-GGUF") HF_GGUF_FILE = os.getenv("HF_GGUF_FILE", "Nusantara-1.8b-Indo-Chat-q4_k_m.gguf") HF_MODEL_TYPE = os.getenv("HF_MODEL_TYPE", "llama") print("⚙️ Model Configuration:") print(f" HF_MODEL_REPO : {HF_MODEL_REPO}") print(f" HF_GGUF_FILE : {HF_GGUF_FILE}") print(f" HF_MODEL_TYPE : {HF_MODEL_TYPE}") # ========================================================== # 🧠 INISIALISASI RAG SERVICE # ========================================================== from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS # Load embedding model def get_embedding_model(): try: return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") except Exception as e: print(f"⚠️ Gagal memuat embedding model: {e}") return None embedding_model = get_embedding_model() # Coba muat index FAISS dari cache vector_store = rag_services.load_vector_store(embedding_model) # Jika belum ada, buat ulang if vector_store is None: print("🧠 Inisialisasi RAG (manual mode)...") vector_store = rag_services.create_vector_store(embedding_model) if vector_store is None: print("⚠️ RAG initialization failed: ❌ Tidak ada dokumen untuk RAG.") else: print("✅ RAG siap digunakan!") # ========================================================== # 💬 MODEL CHAT CLIENT (LLM) # ========================================================== # Dummy inference client: simulasi model lokal / API class DummyLLM: def __call__(self, prompt: str): return f"(Dummy Response) {prompt[:200]}..." llm = DummyLLM() # ========================================================== # 🔗 BANGUN RAG CHAIN # ========================================================== if vector_store: rag_chain = rag_services.get_rag_chain(llm, vector_store) else: rag_chain = None # ========================================================== # 🚀 FASTAPI SETUP # ========================================================== app = FastAPI(title="Meguri RAG Assistant", version=APP_VERSION) class ChatRequest(BaseModel): question: str chat_history: Optional[List[Dict[str, Any]]] = [] user_info: Optional[Dict[str, Any]] = {} class ChatResponse(BaseModel): answer: str @app.get("/") async def root(): return { "app": "Meguri RAG Assistant", "version": APP_VERSION, "status": "running", } @app.post("/chat", response_model=ChatResponse) async def chat_endpoint(req: ChatRequest): if not rag_chain: raise HTTPException(status_code=500, detail="RAG belum siap — vector store kosong.") try: result = await asyncio.to_thread( rag_chain.invoke, {"question": req.question, "chat_history": req.chat_history, "user_info": req.user_info}, ) return {"answer": result} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) # ========================================================== # 🔥 ENTRY POINT (untuk docker run manual) # ========================================================== if __name__ == "__main__": import uvicorn uvicorn.run("app:app", host="0.0.0.0", port=7860)