Spaces:
Sleeping
Sleeping
File size: 1,491 Bytes
05651b7 eee05dc 6a5c1a4 afea398 6a5c1a4 05651b7 6a5c1a4 05651b7 cc78a8c 6a5c1a4 cc78a8c afea398 cc78a8c 05651b7 6a5c1a4 05651b7 cc78a8c 6a5c1a4 05651b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
client = InferenceClient(
model="mistralai/Mistral-7B-Instruct-v0.1",
token=os.getenv("HF_TOKEN")
)
def responder(
mensagem,
historico: list[tuple[str, str]],
mensagem_do_sistema,
max_tokens,
temperatura,
top_p,
):
prompt = f"<s>[SYSTEM] {mensagem_do_sistema}\n"
for user_msg, bot_msg in historico:
if user_msg:
prompt += f"[USER] {user_msg}\n"
if bot_msg:
prompt += f"[ASSISTANT] {bot_msg}\n"
prompt += f"[USER] {mensagem}\n[ASSISTANT]"
resposta = ""
try:
for resposta_parcial in client.text_generation(
prompt=prompt,
max_new_tokens=max_tokens,
stream=True,
temperature=temperatura,
top_p=top_p,
):
resposta += resposta_parcial # ✅ CORRIGIDO
yield resposta
except Exception as e:
yield f"[ERRO]: {str(e)}"
demo = gr.ChatInterface(
responder,
additional_inputs=[
gr.Textbox(value="Você é um assistente útil.", label="Mensagem do sistema"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Máximo de novos tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperatura"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (amostragem de núcleo)"),
],
)
if __name__ == "__main__":
demo.launch()
|