import gradio as gr
import spaces
from transformers import pipeline
import torch

MODEL_ID = "LLM360/K2-Think"

pipe = pipeline(
    "text-generation",
    model=MODEL_ID,
    torch_dtype="auto",
    device_map="auto",
)

@spaces.GPU(duration=120)
def respond(message, history):
    if history is None:
        history = []
    new_history = history + [{"role": "user", "content": message}]
    outputs = pipe(
        new_history,
        max_new_tokens=32768,
    )
    response = outputs[0]["generated_text"][-1]["content"]
    new_history.append({"role": "assistant", "content": response})
    return "", new_history

with gr.Blocks(title="K2-Think Chat") as demo:
    gr.Markdown("# K2-Think Chat App")
    chatbot = gr.Chatbot(type="messages", height=500)
    msg = gr.Textbox(placeholder="Type your message here...", scale=7)
    clear_btn = gr.Button("Clear Chat")
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    clear_btn.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch()