import gradio as gr from llama_cpp import Llama def load_model(): model_path = "mradermacher/Pentesting-GPT-v1.0-GGUF/resolve/main/model-file.q4_k_m.gguf" llm = Llama(model_path=model_path) return llm def respond(message, history): llm = load_model() output = llm(message, max_tokens=100) return output['choices'][0]['text'] gr.ChatInterface(respond).launch()