Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| # Correct imports | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain_community.vectorstores import FAISS # use community version | |
| # Load environment variables | |
| load_dotenv() | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| # Paths | |
| pdf_path = os.path.join("data", "diabetes.pdf") | |
| out_dir = os.path.join("storage", "faiss_index") | |
| os.makedirs(out_dir, exist_ok=True) | |
| print("π Loading PDF...") | |
| loader = PyPDFLoader(pdf_path) | |
| docs = loader.load() | |
| print("βοΈ Splitting into chunks...") | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| chunks = splitter.split_documents(docs) | |
| print("π§ Creating embeddings with OpenAI...") | |
| embeddings = OpenAIEmbeddings() | |
| print("π¦ Building FAISS index...") | |
| vectorstore = FAISS.from_documents(chunks, embeddings) | |
| vectorstore.save_local(out_dir) | |
| print(f"β FAISS index created at {out_dir}") | |