Spaces:
Build error
Build error
| import streamlit as st | |
| import latex2markdown | |
| from langchain.docstore.document import Document | |
| import chromadb | |
| from chromadb.config import Settings | |
| import load_model | |
| from load_model import load_embedding | |
| from load_vectors import load_from_file, load_and_split, create_and_add, load_from_web | |
| persist_directory = load_model.persist_directory | |
| def format_document(document: Document): | |
| """TODO: Implement a nice style""" | |
| return document.dict() | |
| def format_result_set(result): | |
| st.write(latex2markdown.LaTeX2Markdown(result["result"]).to_markdown()) | |
| agree = st.checkbox('Show source documents') | |
| source_documents = result["source_documents"] | |
| if agree: | |
| st.write('Source Documents:') | |
| for document in source_documents: | |
| st.write(format_document(document)) | |
| def get_chroma_client(): | |
| return chromadb.Client(Settings(chroma_db_impl="duckdb+parquet", | |
| persist_directory=persist_directory | |
| )) | |
| #@st.cache_data | |
| def retrieve_collections(): | |
| client = get_chroma_client() | |
| all_collections = client.list_collections() | |
| collections = tuple( [{'name': collection.name, 'model_name': collection.metadata['model_name'], "metadata": collection.metadata} for collection in all_collections] ) | |
| return collections | |
| def load_files(): | |
| client = get_chroma_client() | |
| option = st.radio( | |
| "", | |
| options=["Add Documents", "Start new collection"], | |
| ) | |
| if option == "Add Documents": | |
| collections = retrieve_collections() | |
| selected_collection = st.selectbox( | |
| 'Add to exsisting collection or create a new one', | |
| collections ) | |
| if st.button('Delete Collection (⚠️ This is destructive and not reversible)'): | |
| client.delete_collection(name=selected_collection["name"]) | |
| #retrieve_collections.clear() | |
| collections = retrieve_collections() | |
| if selected_collection: | |
| st.write("Selected Vectorstore:", selected_collection) | |
| option = st.radio( | |
| "", | |
| options=["Upload Files from Local", "Upload Files from Web"], | |
| ) | |
| if option == "Upload Files from Local": | |
| st.write('Source Documents:') | |
| uploaded_files = st.file_uploader("Choose a PDF file", accept_multiple_files=True) | |
| chunk_size = st.text_area('chunk Size:', 1000) | |
| if st.button('Upload'): | |
| docs = load_from_file(uploaded_files) | |
| sub_docs = load_and_split(docs, chunk_size=int(chunk_size)) | |
| vec1 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata']) | |
| st.write("Upload succesful") | |
| else: | |
| st.write('Urls of Source Documents (Comma separated):') | |
| urls = chunk_size = st.text_area('Urls:', '') | |
| chunk_size = st.text_area('chunk Size:', 1000) | |
| urls = urls.replace(",", "" ).replace('"', "" ).split(',') | |
| if st.button('Upload'): | |
| docs = load_from_web(urls) | |
| sub_docs = load_and_split(docs, chunk_size=int(chunk_size)) | |
| vec2 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata']) | |
| st.write("Upload succesful") | |
| else: | |
| collection = st.text_area('Name of your new collection:', '') | |
| model_name = st.text_area('Choose the embedding function:', "hkunlp/instructor-large") | |
| if st.button('Create'): | |
| if len(collection)>3: | |
| ef = load_embedding(model_name) | |
| metadata= {"loaded_docs":[], "Subject":"Terms Example", "model_name": ef.model_name} | |
| client.create_collection(collection, embedding_function=ef, metadata=metadata) | |
| # retrieve_collections.clear() | |
| st.write("Collection " +collection+" succesfully created.") |