import PyPDF2
import os
from typing import List, Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
import pickle

class PDFProcessor:
    """Process PDF files and create searchable vector database"""
    
    def __init__(self, pdf_path: str = "Health Tech Hub Copenhagen.pdf"):
        self.pdf_path = pdf_path
        self.vector_store = None
        self.text_chunks = []
        
    def extract_text_from_pdf(self) -> str:
        """Extract text content from PDF file"""
        if not os.path.exists(self.pdf_path):
            raise FileNotFoundError(f"PDF file not found: {self.pdf_path}")
        
        text = ""
        try:
            with open(self.pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page_num in range(len(pdf_reader.pages)):
                    page = pdf_reader.pages[page_num]
                    text += page.extract_text() + "\n"
            
            print(f"✅ Successfully extracted text from {self.pdf_path}")
            return text
            
        except Exception as e:
            print(f"❌ Error extracting text from PDF: {e}")
            raise
    
    def split_text_into_chunks(self, text: str, chunk_size: int = 1000, chunk_overlap: int = 200) -> List[str]:
        """Split text into smaller chunks for better processing"""
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            length_function=len,
        )
        
        chunks = text_splitter.split_text(text)
        self.text_chunks = chunks
        print(f"✅ Split text into {len(chunks)} chunks")
        return chunks
    
    def create_vector_store(self, chunks: List[str]) -> FAISS:
        """Create a vector store from text chunks"""
        try:
            embeddings = OpenAIEmbeddings()
            vector_store = FAISS.from_texts(chunks, embeddings)
            self.vector_store = vector_store
            print("✅ Vector store created successfully")
            return vector_store
            
        except Exception as e:
            print(f"❌ Error creating vector store: {e}")
            raise
    
    def search_similar_content(self, query: str, k: int = 3) -> List[str]:
        """Search for similar content in the PDF"""
        if not self.vector_store:
            raise ValueError("Vector store not initialized. Call process_pdf() first.")
        
        try:
            results = self.vector_store.similarity_search(query, k=k)
            return [doc.page_content for doc in results]
            
        except Exception as e:
            print(f"❌ Error searching content: {e}")
            return []
    
    def process_pdf(self) -> bool:
        """Complete PDF processing pipeline"""
        try:
            print(f"🔄 Processing PDF: {self.pdf_path}")
            
            # Extract text
            text = self.extract_text_from_pdf()
            
            # Split into chunks
            chunks = self.split_text_into_chunks(text)
            
            # Create vector store
            self.create_vector_store(chunks)
            
            print("✅ PDF processing completed successfully")
            return True
            
        except Exception as e:
            print(f"❌ PDF processing failed: {e}")
            return False
    
    def save_vector_store(self, filepath: str = "vector_store.pkl"):
        """Save vector store to file"""
        if self.vector_store:
            try:
                with open(filepath, 'wb') as f:
                    pickle.dump(self.vector_store, f)
                print(f"✅ Vector store saved to {filepath}")
            except Exception as e:
                print(f"❌ Error saving vector store: {e}")
    
    def load_vector_store(self, filepath: str = "vector_store.pkl") -> bool:
        """Load vector store from file"""
        try:
            if os.path.exists(filepath):
                with open(filepath, 'rb') as f:
                    self.vector_store = pickle.load(f)
                print(f"✅ Vector store loaded from {filepath}")
                return True
            else:
                print(f"⚠️ Vector store file not found: {filepath}")
                return False
        except Exception as e:
            print(f"❌ Error loading vector store: {e}")
            return False