Spaces:
Running
Running
| import gradio as gr | |
| from utils.utils import load_models, get_top_docs | |
| from utils.doc_utils import get_docs | |
| # Load models (LLM and sentence embedding model) | |
| sentence_embedding_model, llm = load_models() | |
| # Load documents and create numerical embeddings | |
| docs = get_docs() | |
| doc_embeddings = sentence_embedding_model.encode(docs) | |
| def rag_pipeline(user_query): | |
| """Retrieve relevant docs, construct a prompt, and generate LLM response.""" | |
| user_query_embedding = sentence_embedding_model.encode(user_query) | |
| # Get top matching documents | |
| top_docs = get_top_docs(user_query_embedding, docs, doc_embeddings, top_n=3) | |
| retrieved_docs = [doc for doc, _, _ in top_docs] | |
| # Construct LLM prompt | |
| prompt = f""" | |
| System Instruction: | |
| "You are an expert assistant who provides clear and concise answers about an individual named Matthew Schulz based on provided context information." | |
| Retrieved Context: | |
| {retrieved_docs} | |
| User Query: | |
| "{user_query}" | |
| Instruction: | |
| "Using the above context, provide a detailed and accurate answer. If the context does not include relevant information, state that you do not have this information and suggest that the user reach out to Matthew directly via his email ([email protected])." | |
| """ | |
| # Run inference with streaming | |
| response = llm(f"<s>[INST] {prompt} [/INST]", stream=True) | |
| return "".join(response) | |
| # Create Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# RAG-powered Assistant") | |
| inp = gr.Textbox(label="Ask a question") | |
| out = gr.Textbox(label="Response") | |
| inp.submit(rag_pipeline, inputs=inp, outputs=out) | |
| if __name__ == "__main__": | |
| demo.launch() | |