Spaces:
Sleeping
Sleeping
| from langchain.document_loaders import PyPDFDirectoryLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.llms import openai | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.embeddings import HuggingFaceBgeEmbeddings | |
| import streamlit as st | |
| ########################################################################################### | |
| def get_pdf_load(): | |
| loader=PyPDFDirectoryLoader("./") | |
| document=loader.load() | |
| return document | |
| ######################################################################################## | |
| def get_text_split(document): | |
| text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| texts =text_splitter.split_documents(document) | |
| return texts | |
| ######################################################################################### | |
| def get_vectorstore(texts): | |
| #Vector and Embeddings | |
| DB_FAISS_PATH = 'vectore_Imstudio/faiss' | |
| #Vector and Embeddings | |
| embeddings= HuggingFaceBgeEmbeddings(model_name='Avditvs/multilingual-e5-small-distill-base-0.1', model_kwargs={'device': 'cpu'}) | |
| db= FAISS.from_documents(texts,embeddings) | |
| db.save_local(DB_FAISS_PATH) | |
| return db | |
| ############################################################################################ | |
| def get_chain(db): | |
| llm=ChatOpenAI(base_url="https://0cc8-46-148-43-9.ngrok-free.app/v1", api_key="lm-studio",temperature=0.1,model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF") | |
| #Build a chain | |
| qa_chain = ConversationalRetrievalChain.from_llm( | |
| llm,db.as_retriever (search_kwargs={'k':2}),return_source_documents=True) | |
| return qa_chain | |
| #################################################################################################################### | |
| def get_conversation(query_user,qa_chain): | |
| chat_history=[] | |
| query_1="این سوال را به زبان فارسی تشریح کن:" | |
| query=query_1+query_user | |
| result= qa_chain({'question': query, 'chat_history': chat_history}) | |
| st.write('Answer of you question:' +result['answer'] +'\n') | |
| return result | |
| #################################################################################################################### | |
| def main(): | |
| st.set_page_config( | |
| page_title="Chat Bot PDFs", | |
| page_icon=":books:", | |
| ) | |
| st.header("Chat Bot PDFs :books:") | |
| user_question = st.text_input("Ask a question about your documents:") | |
| document=get_pdf_load() | |
| st.write("load pdf") | |
| texts=get_text_split(document) | |
| st.write("text split") | |
| db=get_vectorstore(texts) | |
| st.write("vectore store") | |
| qa_chain=get_chain(db) | |
| st.write("compelete build model") | |
| if st.button("Answer"): | |
| with st.spinner("Answering"): | |
| get_conversation(query_user=user_question,qa_chain=qa_chain) | |
| #if st.button("CLEAR"): | |
| #with st.spinner("CLEARING"): | |
| #st.cache_data.clear() | |
| #with st.sidebar: | |
| #if st.button("Process build model"): | |
| if __name__ == "__main__": | |
| main() | |