import os from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from langchain_openai import ChatOpenAI from langdetect import detect from dotenv import load_dotenv load_dotenv() # loads .env into os.environ os.environ["TRANSFORMERS_CACHE"] = "/tmp/.cache" class Recommender: def __init__(self, index_dir="faiss_index"): # โœ… Embeddings (English only) self.embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2") self.db = FAISS.load_local( index_dir, self.embeddings, allow_dangerous_deserialization=True ) # โœ… OpenRouter LLM (used for explanations + translation) self.llmExplanation = ChatOpenAI( openai_api_key=os.environ["OPENROUTER"], openai_api_base="https://openrouter.ai/api/v1", model="mistralai/mistral-7b-instruct:free", temperature=0, max_tokens=512, ) self.llmTranslation = ChatOpenAI( openai_api_key=os.environ["OPENROUTER"], openai_api_base="https://openrouter.ai/api/v1", model="mistralai/mistral-7b-instruct:free", # switch here temperature=0, max_tokens=512 ) # ๐Ÿ”น Stage 1a: Language detection def detect_language(self, text: str) -> str: return detect(text) # ๐Ÿ”น Stage 1b + 4: Translation (to/from English) def translate(self, text: str, target_lang: str = "en") -> str: prompt = f"Translate this text into {target_lang}: {text}" return self.llmTranslation.invoke(prompt).content # ๐Ÿ”น Stage 2: Retrieval def search(self, query: str, k: int = 10): return self.db.similarity_search(query, k=k) # ๐Ÿ”น Stage 3: Explanation (always in English) def explain(self, query: str, docs, user_lang="en"): results = [] for d in docs: prompt = ( f"User request: {query}\n" f"Candidate movie: {d.metadata['title']} " f"({d.metadata.get('genres')}).\n" f"Overview: {d.metadata.get('overview')}\n\n" "Explain in one sentence why this movie could be a good recommendation " "for the userโ€™s request. Focus only on positive connections." ) response = self.llmExplanation.invoke(prompt).content results.append({ "title": d.metadata["title"], "genres": d.metadata["genres"], "overview": d.metadata["overview"], "director": d.metadata.get("director"), "cast": d.metadata.get("cast"), "release_date": d.metadata.get("release_date"), "vote_average": d.metadata.get("vote_average"), "explanation": response, # always English at this stage }) return results