Spaces:
Runtime error
Runtime error
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| import os | |
| import download_and_extract_zip | |
| def gen_splits(): | |
| URL = os.getenv('URL') | |
| destination_folder = os.getcwd() | |
| download_and_extract_zip.download_and_extract_zip(URL, destination_folder) | |
| file_paths = os.listdir('Model_TS_Full') | |
| new_file_paths = [os.getcwd() +"/Model_TS_Full/"+ file for file in file_paths] | |
| loaders = [] | |
| for file_path in new_file_paths: | |
| if file_path.lower().endswith(".pdf"): | |
| loaders.append(PyPDFLoader(file_path)) | |
| docs = [] | |
| for loader in loaders: | |
| docs.extend(loader.load()) | |
| # Splitting Documents | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size = 7500, chunk_overlap = 500) | |
| splits = text_splitter.split_documents(docs) | |
| return splits |