import gradio as gr import spaces from transformers import pipeline import torch MODEL_ID = "LLM360/K2-Think" pipe = pipeline( "text-generation", model=MODEL_ID, torch_dtype="auto", device_map="auto", ) @spaces.GPU(duration=120) def respond(message, history): if history is None: history = [] new_history = history + [{"role": "user", "content": message}] outputs = pipe( new_history, max_new_tokens=32768, ) response = outputs[0]["generated_text"][-1]["content"] new_history.append({"role": "assistant", "content": response}) return "", new_history with gr.Blocks(title="K2-Think Chat") as demo: gr.Markdown("# K2-Think Chat App") chatbot = gr.Chatbot(type="messages", height=500) msg = gr.Textbox(placeholder="Type your message here...", scale=7) clear_btn = gr.Button("Clear Chat") msg.submit(respond, [msg, chatbot], [msg, chatbot]) clear_btn.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.launch()