svd-i2v / app.py
Boicho's picture
Update app.py
11c99b6 verified
import torch
import gradio as gr
import numpy as np
import imageio.v3 as iio
from PIL import Image
from diffusers import DiffusionPipeline
MODEL_ID = "stabilityai/stable-video-diffusion-img2vid"
device = "cpu"
dtype = torch.float32
pipe = DiffusionPipeline.from_pretrained(
MODEL_ID,
torch_dtype=dtype,
variant="fp16" if dtype == torch.float16 else None,
)
pipe.to(device)
def resize_to_multiple_of_8(img, max_side=1024):
w, h = img.size
scale = min(max_side / max(w, h), 1.0)
new_w = int(np.floor(w * scale / 8) * 8)
new_h = int(np.floor(h * scale / 8) * 8)
return img.convert("RGB").resize((new_w, new_h), Image.LANCZOS)
def generate_video(image, motion=50, noise=0.1, num_frames=25, fps=8, seed=0):
if image is None:
raise gr.Error("Please upload an image.")
image = resize_to_multiple_of_8(image)
generator = torch.Generator(device=device)
if seed:
generator.manual_seed(seed)
with torch.autocast(device_type=device, dtype=dtype):
result = pipe(
image,
num_frames=num_frames,
fps=fps,
motion_bucket_id=motion,
noise_aug_strength=noise,
generator=generator,
)
frames = [np.array(f.convert("RGB")) for f in result.frames]
iio.imwrite("out.mp4", frames, fps=fps, codec="libx264", quality=8)
return "out.mp4"
demo = gr.Interface(
fn=generate_video,
inputs=[
gr.Image(label="Input Image", type="pil"),
gr.Slider(1, 255, 50, label="Motion strength"),
gr.Slider(0.0, 0.3, 0.1, label="Noise strength"),
gr.Slider(8, 25, 25, step=1, label="Frames"),
gr.Slider(5, 30, 8, step=1, label="FPS"),
gr.Number(value=0, label="Seed (0=random)"),
],
outputs=gr.Video(label="Generated Video"),
title="Stable Video Diffusion (Image → Video)",
description="Generate ~3-second short video clips from a single image using Stability AI’s open model.",
)
if __name__ == "__main__":
demo.launch()