Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,6 +36,65 @@ hf_hub_download_local(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/
|
|
| 36 |
hf_hub_download_local(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", local_dir="models/loras")
|
| 37 |
print("Downloads complete.")
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# --- Boilerplate code from the original script ---
|
| 40 |
def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
|
| 41 |
"""Returns the value at the given index of a sequence or mapping.
|
|
@@ -216,7 +275,7 @@ def generate_video(
|
|
| 216 |
end_image_pil,
|
| 217 |
prompt,
|
| 218 |
negative_prompt="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝,",
|
| 219 |
-
duration=
|
| 220 |
progress=gr.Progress(track_tqdm=True)
|
| 221 |
):
|
| 222 |
"""
|
|
@@ -224,7 +283,20 @@ def generate_video(
|
|
| 224 |
This function is called every time the user clicks the 'Generate' button.
|
| 225 |
"""
|
| 226 |
FPS = 16
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
clip = MODELS_AND_NODES["clip"]
|
| 230 |
vae = MODELS_AND_NODES["vae"]
|
|
@@ -243,11 +315,11 @@ def generate_video(
|
|
| 243 |
createvideo = MODELS_AND_NODES["CreateVideo"]
|
| 244 |
savevideo = MODELS_AND_NODES["SaveVideo"]
|
| 245 |
|
| 246 |
-
# Save
|
| 247 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as start_file, \
|
| 248 |
tempfile.NamedTemporaryFile(suffix=".png", delete=False) as end_file:
|
| 249 |
-
|
| 250 |
-
|
| 251 |
start_image_path = start_file.name
|
| 252 |
end_image_path = end_file.name
|
| 253 |
|
|
@@ -269,7 +341,7 @@ def generate_video(
|
|
| 269 |
|
| 270 |
progress(0.2, desc="Preparing initial latents...")
|
| 271 |
initial_latents = wanfirstlastframetovideo.EXECUTE_NORMALIZED(
|
| 272 |
-
width=
|
| 273 |
positive=get_value_at_index(positive_conditioning, 0),
|
| 274 |
negative=get_value_at_index(negative_conditioning, 0),
|
| 275 |
vae=get_value_at_index(vae, 0),
|
|
@@ -321,6 +393,8 @@ def generate_video(
|
|
| 321 |
progress(1.0, desc="Done!")
|
| 322 |
return f"output/{save_result['ui']['images'][0]['filename']}"
|
| 323 |
|
|
|
|
|
|
|
| 324 |
css = '''
|
| 325 |
.fillable{max-width: 1100px !important}
|
| 326 |
.dark .progress-text {color: white}
|
|
@@ -328,6 +402,7 @@ css = '''
|
|
| 328 |
with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
|
| 329 |
gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
|
| 330 |
gr.Markdown("Running the [Wan 2.2 First/Last Frame ComfyUI workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/) on ZeroGPU")
|
|
|
|
| 331 |
with gr.Row():
|
| 332 |
with gr.Column():
|
| 333 |
with gr.Row():
|
|
@@ -337,13 +412,10 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
|
|
| 337 |
prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images", value="transition")
|
| 338 |
|
| 339 |
with gr.Accordion("Advanced Settings", open=False):
|
| 340 |
-
duration = gr.
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
step=0.1,
|
| 345 |
-
label="Video Duration (seconds)",
|
| 346 |
-
info="Longer videos take longer to generate"
|
| 347 |
)
|
| 348 |
negative_prompt = gr.Textbox(
|
| 349 |
label="Negative Prompt",
|
|
@@ -365,7 +437,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
|
|
| 365 |
gr.Examples(
|
| 366 |
examples=[
|
| 367 |
["poli_tower.png", "tower_takes_off.png", "the man turns"],
|
| 368 |
-
["capybara_zoomed.png", "capybara.
|
| 369 |
],
|
| 370 |
inputs=[start_image, end_image, prompt],
|
| 371 |
outputs=output_video,
|
|
|
|
| 36 |
hf_hub_download_local(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", local_dir="models/loras")
|
| 37 |
print("Downloads complete.")
|
| 38 |
|
| 39 |
+
# --- Image Processing Functions ---
|
| 40 |
+
def calculate_video_dimensions(width, height, max_size=832, min_size=480):
|
| 41 |
+
"""
|
| 42 |
+
Calculate video dimensions based on input image size.
|
| 43 |
+
Larger dimension becomes max_size, smaller becomes proportional.
|
| 44 |
+
If square, use min_size x min_size.
|
| 45 |
+
Results are rounded to nearest multiple of 16.
|
| 46 |
+
"""
|
| 47 |
+
# Handle square images
|
| 48 |
+
if width == height:
|
| 49 |
+
video_width = min_size
|
| 50 |
+
video_height = min_size
|
| 51 |
+
else:
|
| 52 |
+
# Calculate aspect ratio
|
| 53 |
+
aspect_ratio = width / height
|
| 54 |
+
|
| 55 |
+
if width > height:
|
| 56 |
+
# Landscape orientation
|
| 57 |
+
video_width = max_size
|
| 58 |
+
video_height = int(max_size / aspect_ratio)
|
| 59 |
+
else:
|
| 60 |
+
# Portrait orientation
|
| 61 |
+
video_height = max_size
|
| 62 |
+
video_width = int(max_size * aspect_ratio)
|
| 63 |
+
|
| 64 |
+
# Round to nearest multiple of 16
|
| 65 |
+
video_width = round(video_width / 16) * 16
|
| 66 |
+
video_height = round(video_height / 16) * 16
|
| 67 |
+
|
| 68 |
+
# Ensure minimum size
|
| 69 |
+
video_width = max(video_width, 16)
|
| 70 |
+
video_height = max(video_height, 16)
|
| 71 |
+
|
| 72 |
+
return video_width, video_height
|
| 73 |
+
|
| 74 |
+
def resize_and_crop_to_match(target_image, reference_image):
|
| 75 |
+
"""
|
| 76 |
+
Resize and center crop target_image to match reference_image dimensions.
|
| 77 |
+
"""
|
| 78 |
+
ref_width, ref_height = reference_image.size
|
| 79 |
+
target_width, target_height = target_image.size
|
| 80 |
+
|
| 81 |
+
# Calculate scaling factor to ensure target covers reference dimensions
|
| 82 |
+
scale = max(ref_width / target_width, ref_height / target_height)
|
| 83 |
+
|
| 84 |
+
# Resize target image
|
| 85 |
+
new_width = int(target_width * scale)
|
| 86 |
+
new_height = int(target_height * scale)
|
| 87 |
+
resized = target_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
| 88 |
+
|
| 89 |
+
# Center crop to match reference dimensions
|
| 90 |
+
left = (new_width - ref_width) // 2
|
| 91 |
+
top = (new_height - ref_height) // 2
|
| 92 |
+
right = left + ref_width
|
| 93 |
+
bottom = top + ref_height
|
| 94 |
+
|
| 95 |
+
cropped = resized.crop((left, top, right, bottom))
|
| 96 |
+
return cropped
|
| 97 |
+
|
| 98 |
# --- Boilerplate code from the original script ---
|
| 99 |
def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
|
| 100 |
"""Returns the value at the given index of a sequence or mapping.
|
|
|
|
| 275 |
end_image_pil,
|
| 276 |
prompt,
|
| 277 |
negative_prompt="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝,",
|
| 278 |
+
duration=33,
|
| 279 |
progress=gr.Progress(track_tqdm=True)
|
| 280 |
):
|
| 281 |
"""
|
|
|
|
| 283 |
This function is called every time the user clicks the 'Generate' button.
|
| 284 |
"""
|
| 285 |
FPS = 16
|
| 286 |
+
|
| 287 |
+
# Process images: resize and crop second image to match first
|
| 288 |
+
# The first image determines the dimensions
|
| 289 |
+
processed_start_image = start_image_pil.copy()
|
| 290 |
+
processed_end_image = resize_and_crop_to_match(end_image_pil, start_image_pil)
|
| 291 |
+
|
| 292 |
+
# Calculate video dimensions based on the first image
|
| 293 |
+
video_width, video_height = calculate_video_dimensions(
|
| 294 |
+
processed_start_image.width,
|
| 295 |
+
processed_start_image.height
|
| 296 |
+
)
|
| 297 |
+
|
| 298 |
+
print(f"Input image size: {processed_start_image.width}x{processed_start_image.height}")
|
| 299 |
+
print(f"Video dimensions: {video_width}x{video_height}")
|
| 300 |
|
| 301 |
clip = MODELS_AND_NODES["clip"]
|
| 302 |
vae = MODELS_AND_NODES["vae"]
|
|
|
|
| 315 |
createvideo = MODELS_AND_NODES["CreateVideo"]
|
| 316 |
savevideo = MODELS_AND_NODES["SaveVideo"]
|
| 317 |
|
| 318 |
+
# Save processed images to temporary files
|
| 319 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as start_file, \
|
| 320 |
tempfile.NamedTemporaryFile(suffix=".png", delete=False) as end_file:
|
| 321 |
+
processed_start_image.save(start_file.name)
|
| 322 |
+
processed_end_image.save(end_file.name)
|
| 323 |
start_image_path = start_file.name
|
| 324 |
end_image_path = end_file.name
|
| 325 |
|
|
|
|
| 341 |
|
| 342 |
progress(0.2, desc="Preparing initial latents...")
|
| 343 |
initial_latents = wanfirstlastframetovideo.EXECUTE_NORMALIZED(
|
| 344 |
+
width=video_width, height=video_height, length=duration, batch_size=1,
|
| 345 |
positive=get_value_at_index(positive_conditioning, 0),
|
| 346 |
negative=get_value_at_index(negative_conditioning, 0),
|
| 347 |
vae=get_value_at_index(vae, 0),
|
|
|
|
| 393 |
progress(1.0, desc="Done!")
|
| 394 |
return f"output/{save_result['ui']['images'][0]['filename']}"
|
| 395 |
|
| 396 |
+
|
| 397 |
+
|
| 398 |
css = '''
|
| 399 |
.fillable{max-width: 1100px !important}
|
| 400 |
.dark .progress-text {color: white}
|
|
|
|
| 402 |
with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
|
| 403 |
gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
|
| 404 |
gr.Markdown("Running the [Wan 2.2 First/Last Frame ComfyUI workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/) on ZeroGPU")
|
| 405 |
+
|
| 406 |
with gr.Row():
|
| 407 |
with gr.Column():
|
| 408 |
with gr.Row():
|
|
|
|
| 412 |
prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images", value="transition")
|
| 413 |
|
| 414 |
with gr.Accordion("Advanced Settings", open=False):
|
| 415 |
+
duration = gr.Radio(
|
| 416 |
+
[("Short (2s)", 33), ("Mid (4s)", 66)],
|
| 417 |
+
value=33,
|
| 418 |
+
label="Video Duration",
|
|
|
|
|
|
|
|
|
|
| 419 |
)
|
| 420 |
negative_prompt = gr.Textbox(
|
| 421 |
label="Negative Prompt",
|
|
|
|
| 437 |
gr.Examples(
|
| 438 |
examples=[
|
| 439 |
["poli_tower.png", "tower_takes_off.png", "the man turns"],
|
| 440 |
+
["capybara_zoomed.png", "capybara.png", "a dramatic dolly zoom"],
|
| 441 |
],
|
| 442 |
inputs=[start_image, end_image, prompt],
|
| 443 |
outputs=output_video,
|