Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| from transformers import AutoModel, AutoTokenizer | |
| import spaces | |
| # Initialize model and tokenizer | |
| torch.manual_seed(100) | |
| model = AutoModel.from_pretrained( | |
| 'openbmb/MiniCPM-V-4_5', | |
| trust_remote_code=True, | |
| attn_implementation='sdpa', | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| model = model.eval().cuda() | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| 'openbmb/MiniCPM-V-4_5', | |
| trust_remote_code=True | |
| ) | |
| def respond(message, history, enable_thinking): | |
| """ | |
| Process user message and generate response | |
| """ | |
| # Build conversation history in the format expected by the model | |
| msgs = [] | |
| # Add previous conversation history | |
| for h in history: | |
| user_msg = h[0] | |
| assistant_msg = h[1] | |
| # Parse user message for images and text | |
| user_content = [] | |
| if isinstance(user_msg, tuple): | |
| # If user message contains an image | |
| img_path, text = user_msg | |
| img = Image.open(img_path).convert('RGB') | |
| user_content = [img, text] if text else [img] | |
| else: | |
| # Text only message | |
| user_content = [user_msg] | |
| msgs.append({"role": "user", "content": user_content}) | |
| if assistant_msg: | |
| msgs.append({"role": "assistant", "content": [assistant_msg]}) | |
| # Add current message | |
| current_content = [] | |
| if isinstance(message, dict): | |
| # Handle multimodal input | |
| if message.get("files"): | |
| for file_path in message["files"]: | |
| img = Image.open(file_path).convert('RGB') | |
| current_content.append(img) | |
| if message.get("text"): | |
| current_content.append(message["text"]) | |
| else: | |
| # Handle text-only input | |
| current_content = [message] | |
| msgs.append({"role": "user", "content": current_content}) | |
| # Generate response | |
| try: | |
| answer = model.chat( | |
| msgs=msgs, | |
| tokenizer=tokenizer, | |
| enable_thinking=enable_thinking | |
| ) | |
| return answer | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks(title="MiniCPM-V Chatbot") as demo: | |
| gr.Markdown( | |
| """ | |
| # π€ MiniCPM-V Multimodal Chatbot | |
| Upload images and ask questions about them, or have a text conversation. | |
| The model supports multi-turn conversations with context memory. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| chatbot = gr.Chatbot( | |
| height=500, | |
| show_label=False, | |
| container=True, | |
| type="tuples" | |
| ) | |
| with gr.Row(): | |
| msg = gr.MultimodalTextbox( | |
| interactive=True, | |
| file_types=["image"], | |
| placeholder="Type a message or upload an image...", | |
| show_label=False, | |
| container=False | |
| ) | |
| with gr.Row(): | |
| clear = gr.Button("ποΈ Clear", size="sm") | |
| submit = gr.Button("π€ Send", variant="primary", size="sm") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Settings") | |
| enable_thinking = gr.Checkbox( | |
| label="Enable Thinking Mode", | |
| value=False, | |
| info="Enable the model's thinking process" | |
| ) | |
| gr.Markdown( | |
| """ | |
| ### Examples | |
| - Upload an image and ask "What is in this picture?" | |
| - Ask "What are the main objects visible?" | |
| - Follow up with "What should I pay attention to here?" | |
| """ | |
| ) | |
| # Handle message submission | |
| def user_submit(message, history, enable_thinking): | |
| # Format the user message for display | |
| if isinstance(message, dict) and message.get("files"): | |
| # If there are files, create tuple format for chatbot display | |
| user_msg = (message["files"][0], message.get("text", "")) | |
| else: | |
| user_msg = message.get("text", "") if isinstance(message, dict) else message | |
| # Add user message to history | |
| history = history + [(user_msg, None)] | |
| # Generate response | |
| response = respond(message, history[:-1], enable_thinking) | |
| # Update history with response | |
| history[-1] = (history[-1][0], response) | |
| return "", history | |
| # Event handlers | |
| msg.submit( | |
| user_submit, | |
| inputs=[msg, chatbot, enable_thinking], | |
| outputs=[msg, chatbot] | |
| ) | |
| submit.click( | |
| user_submit, | |
| inputs=[msg, chatbot, enable_thinking], | |
| outputs=[msg, chatbot] | |
| ) | |
| clear.click( | |
| lambda: (None, []), | |
| outputs=[msg, chatbot] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) |