Matthew Schulz
feat: init app
ff21483
raw
history blame
1.68 kB
import gradio as gr
from utils.utils import load_models, get_top_docs
from utils.doc_utils import get_docs
# Load models (LLM and sentence embedding model)
sentence_embedding_model, llm = load_models()
# Load documents and create numerical embeddings
docs = get_docs()
doc_embeddings = sentence_embedding_model.encode(docs)
def rag_pipeline(user_query):
"""Retrieve relevant docs, construct a prompt, and generate LLM response."""
user_query_embedding = sentence_embedding_model.encode(user_query)
# Get top matching documents
top_docs = get_top_docs(user_query_embedding, docs, doc_embeddings, top_n=3)
retrieved_docs = [doc for doc, _, _ in top_docs]
# Construct LLM prompt
prompt = f"""
System Instruction:
"You are an expert assistant who provides clear and concise answers about an individual named Matthew Schulz based on provided context information."
Retrieved Context:
{retrieved_docs}
User Query:
"{user_query}"
Instruction:
"Using the above context, provide a detailed and accurate answer. If the context does not include relevant information, state that you do not have this information and suggest that the user reach out to Matthew directly via his email ([email protected])."
"""
# Run inference with streaming
response = llm(f"<s>[INST] {prompt} [/INST]", stream=True)
return "".join(response)
# Create Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# RAG-powered Assistant")
inp = gr.Textbox(label="Ask a question")
out = gr.Textbox(label="Response")
inp.submit(rag_pipeline, inputs=inp, outputs=out)
if __name__ == "__main__":
demo.launch()