| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain.schema import Document | |
| def prepare_documents(text: str, chunk_size=1000, chunk_overlap=200): | |
| """ | |
| Splits long log text into smaller chunks for embedding. | |
| """ | |
| docs = [Document(page_content=text)] | |
| splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) | |
| return splitter.split_documents(docs) | |
| def create_vectorstore(documents, model_name="sentence-transformers/all-MiniLM-L6-v2"): | |
| """ | |
| Embeds chunks and stores them in a FAISS vector DB for retrieval. | |
| """ | |
| embeddings = HuggingFaceEmbeddings(model_name=model_name) | |
| return FAISS.from_documents(documents, embeddings) | |