Update README.md (#3)
Browse files- Update README.md (9d26f88f5e293f98f8aabd78475088f62a67e60f)
Co-authored-by: Tolga Cangöz <[email protected]>
README.md
CHANGED
|
@@ -3,6 +3,12 @@ license: other
|
|
| 3 |
license_name: skywork-license
|
| 4 |
license_link: LICENSE
|
| 5 |
pipeline_tag: image-to-video
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
---
|
| 7 |
<p align="center">
|
| 8 |
<img src="assets/logo2.png" alt="SkyReels Logo" width="50%">
|
|
@@ -49,7 +55,7 @@ The demos above showcase 30-second videos generated using our SkyReels-V2 Diffus
|
|
| 49 |
- [x] Single-GPU & Multi-GPU Inference Code
|
| 50 |
- [x] <a href="https://huggingface.co/Skywork/SkyCaptioner-V1">SkyCaptioner-V1</a>: A Video Captioning Model
|
| 51 |
- [x] Prompt Enhancer
|
| 52 |
-
- [
|
| 53 |
- [ ] Checkpoints of the 5B Models Series
|
| 54 |
- [ ] Checkpoints of the Camera Director Models
|
| 55 |
- [ ] Checkpoints of the Step & Guidance Distill Model
|
|
@@ -57,6 +63,65 @@ The demos above showcase 30-second videos generated using our SkyReels-V2 Diffus
|
|
| 57 |
|
| 58 |
## 🚀 Quickstart
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
#### Installation
|
| 61 |
```shell
|
| 62 |
# clone the repository.
|
|
|
|
| 3 |
license_name: skywork-license
|
| 4 |
license_link: LICENSE
|
| 5 |
pipeline_tag: image-to-video
|
| 6 |
+
library_name: diffusers
|
| 7 |
+
tags:
|
| 8 |
+
- video
|
| 9 |
+
- video generation
|
| 10 |
+
language:
|
| 11 |
+
- en
|
| 12 |
---
|
| 13 |
<p align="center">
|
| 14 |
<img src="assets/logo2.png" alt="SkyReels Logo" width="50%">
|
|
|
|
| 55 |
- [x] Single-GPU & Multi-GPU Inference Code
|
| 56 |
- [x] <a href="https://huggingface.co/Skywork/SkyCaptioner-V1">SkyCaptioner-V1</a>: A Video Captioning Model
|
| 57 |
- [x] Prompt Enhancer
|
| 58 |
+
- [x] Diffusers integration
|
| 59 |
- [ ] Checkpoints of the 5B Models Series
|
| 60 |
- [ ] Checkpoints of the Camera Director Models
|
| 61 |
- [ ] Checkpoints of the Step & Guidance Distill Model
|
|
|
|
| 63 |
|
| 64 |
## 🚀 Quickstart
|
| 65 |
|
| 66 |
+
Wan can run directly using 🤗 Diffusers!
|
| 67 |
+
|
| 68 |
+
```py
|
| 69 |
+
# pip install ftfy
|
| 70 |
+
import numpy as np
|
| 71 |
+
import torch
|
| 72 |
+
from diffusers import AutoModel, SkyReelsV2ImageToVideoPipeline, UniPCMultistepScheduler
|
| 73 |
+
from diffusers.utils import export_to_video, load_image
|
| 74 |
+
|
| 75 |
+
model_id = "Skywork/SkyReels-V2-I2V-1.3B-540P-Diffusers"
|
| 76 |
+
vae = AutoModel.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
|
| 77 |
+
|
| 78 |
+
pipeline = SkyReelsV2ImageToVideoPipeline.from_pretrained(
|
| 79 |
+
model_id,
|
| 80 |
+
vae=vae,
|
| 81 |
+
torch_dtype=torch.bfloat16
|
| 82 |
+
)
|
| 83 |
+
flow_shift = 8.0 # 8.0 for T2V, 5.0 for I2V
|
| 84 |
+
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
|
| 85 |
+
pipeline = pipeline.to("cuda")
|
| 86 |
+
|
| 87 |
+
first_frame = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/flf2v_input_first_frame.png")
|
| 88 |
+
|
| 89 |
+
def aspect_ratio_resize(image, pipeline, max_area=720 * 1280):
|
| 90 |
+
aspect_ratio = image.height / image.width
|
| 91 |
+
mod_value = pipeline.vae_scale_factor_spatial * pipeline.transformer.config.patch_size[1]
|
| 92 |
+
height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
|
| 93 |
+
width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
|
| 94 |
+
image = image.resize((width, height))
|
| 95 |
+
return image, height, width
|
| 96 |
+
|
| 97 |
+
def center_crop_resize(image, height, width):
|
| 98 |
+
# Calculate resize ratio to match first frame dimensions
|
| 99 |
+
resize_ratio = max(width / image.width, height / image.height)
|
| 100 |
+
|
| 101 |
+
# Resize the image
|
| 102 |
+
width = round(image.width * resize_ratio)
|
| 103 |
+
height = round(image.height * resize_ratio)
|
| 104 |
+
size = [width, height]
|
| 105 |
+
image = TF.center_crop(image, size)
|
| 106 |
+
|
| 107 |
+
return image, height, width
|
| 108 |
+
|
| 109 |
+
first_frame, height, width = aspect_ratio_resize(first_frame, pipeline)
|
| 110 |
+
|
| 111 |
+
prompt = "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective."
|
| 112 |
+
|
| 113 |
+
output = pipeline(
|
| 114 |
+
image=first_frame,
|
| 115 |
+
guidance_scale=5.0
|
| 116 |
+
prompt=prompt,
|
| 117 |
+
num_inference_steps=50,
|
| 118 |
+
height=544, # 720 for 720P
|
| 119 |
+
width=960, # 1280 for 720P
|
| 120 |
+
num_frames=97,
|
| 121 |
+
).frames[0]
|
| 122 |
+
export_to_video(output, "video.mp4", fps=24, quality=8)
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
#### Installation
|
| 126 |
```shell
|
| 127 |
# clone the repository.
|