import torch import gradio as gr import numpy as np import imageio.v3 as iio from PIL import Image from diffusers import DiffusionPipeline MODEL_ID = "stabilityai/stable-video-diffusion-img2vid" device = "cpu" dtype = torch.float32 pipe = DiffusionPipeline.from_pretrained( MODEL_ID, torch_dtype=dtype, variant="fp16" if dtype == torch.float16 else None, ) pipe.to(device) def resize_to_multiple_of_8(img, max_side=1024): w, h = img.size scale = min(max_side / max(w, h), 1.0) new_w = int(np.floor(w * scale / 8) * 8) new_h = int(np.floor(h * scale / 8) * 8) return img.convert("RGB").resize((new_w, new_h), Image.LANCZOS) def generate_video(image, motion=50, noise=0.1, num_frames=25, fps=8, seed=0): if image is None: raise gr.Error("Please upload an image.") image = resize_to_multiple_of_8(image) generator = torch.Generator(device=device) if seed: generator.manual_seed(seed) with torch.autocast(device_type=device, dtype=dtype): result = pipe( image, num_frames=num_frames, fps=fps, motion_bucket_id=motion, noise_aug_strength=noise, generator=generator, ) frames = [np.array(f.convert("RGB")) for f in result.frames] iio.imwrite("out.mp4", frames, fps=fps, codec="libx264", quality=8) return "out.mp4" demo = gr.Interface( fn=generate_video, inputs=[ gr.Image(label="Input Image", type="pil"), gr.Slider(1, 255, 50, label="Motion strength"), gr.Slider(0.0, 0.3, 0.1, label="Noise strength"), gr.Slider(8, 25, 25, step=1, label="Frames"), gr.Slider(5, 30, 8, step=1, label="FPS"), gr.Number(value=0, label="Seed (0=random)"), ], outputs=gr.Video(label="Generated Video"), title="Stable Video Diffusion (Image → Video)", description="Generate ~3-second short video clips from a single image using Stability AI’s open model.", ) if __name__ == "__main__": demo.launch()