import gradio as gr
from llama_cpp import Llama

def load_model():
    model_path = "mradermacher/Pentesting-GPT-v1.0-GGUF/resolve/main/model-file.q4_k_m.gguf"
    llm = Llama(model_path=model_path)
    return llm

def respond(message, history):
    llm = load_model()
    output = llm(message, max_tokens=100)
    return output['choices'][0]['text']

gr.ChatInterface(respond).launch()