multimodalart HF Staff commited on
Commit
06ce900
·
verified ·
1 Parent(s): 32ba0e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -14
app.py CHANGED
@@ -36,6 +36,65 @@ hf_hub_download_local(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/
36
  hf_hub_download_local(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", local_dir="models/loras")
37
  print("Downloads complete.")
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # --- Boilerplate code from the original script ---
40
  def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
41
  """Returns the value at the given index of a sequence or mapping.
@@ -216,7 +275,7 @@ def generate_video(
216
  end_image_pil,
217
  prompt,
218
  negative_prompt="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝,",
219
- duration=2,
220
  progress=gr.Progress(track_tqdm=True)
221
  ):
222
  """
@@ -224,7 +283,20 @@ def generate_video(
224
  This function is called every time the user clicks the 'Generate' button.
225
  """
226
  FPS = 16
227
- num_frames = max(2, int(duration * FPS))
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  clip = MODELS_AND_NODES["clip"]
230
  vae = MODELS_AND_NODES["vae"]
@@ -243,11 +315,11 @@ def generate_video(
243
  createvideo = MODELS_AND_NODES["CreateVideo"]
244
  savevideo = MODELS_AND_NODES["SaveVideo"]
245
 
246
- # Save uploaded images to temporary files
247
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as start_file, \
248
  tempfile.NamedTemporaryFile(suffix=".png", delete=False) as end_file:
249
- start_image_pil.save(start_file.name)
250
- end_image_pil.save(end_file.name)
251
  start_image_path = start_file.name
252
  end_image_path = end_file.name
253
 
@@ -269,7 +341,7 @@ def generate_video(
269
 
270
  progress(0.2, desc="Preparing initial latents...")
271
  initial_latents = wanfirstlastframetovideo.EXECUTE_NORMALIZED(
272
- width=480, height=480, length=num_frames, batch_size=1,
273
  positive=get_value_at_index(positive_conditioning, 0),
274
  negative=get_value_at_index(negative_conditioning, 0),
275
  vae=get_value_at_index(vae, 0),
@@ -321,6 +393,8 @@ def generate_video(
321
  progress(1.0, desc="Done!")
322
  return f"output/{save_result['ui']['images'][0]['filename']}"
323
 
 
 
324
  css = '''
325
  .fillable{max-width: 1100px !important}
326
  .dark .progress-text {color: white}
@@ -328,6 +402,7 @@ css = '''
328
  with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
329
  gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
330
  gr.Markdown("Running the [Wan 2.2 First/Last Frame ComfyUI workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/) on ZeroGPU")
 
331
  with gr.Row():
332
  with gr.Column():
333
  with gr.Row():
@@ -337,13 +412,10 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
337
  prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images", value="transition")
338
 
339
  with gr.Accordion("Advanced Settings", open=False):
340
- duration = gr.Slider(
341
- minimum=1.0,
342
- maximum=5.0,
343
- value=2.0,
344
- step=0.1,
345
- label="Video Duration (seconds)",
346
- info="Longer videos take longer to generate"
347
  )
348
  negative_prompt = gr.Textbox(
349
  label="Negative Prompt",
@@ -365,7 +437,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
365
  gr.Examples(
366
  examples=[
367
  ["poli_tower.png", "tower_takes_off.png", "the man turns"],
368
- ["capybara_zoomed.png", "capybara.webp", "a dramatic dolly zoom"],
369
  ],
370
  inputs=[start_image, end_image, prompt],
371
  outputs=output_video,
 
36
  hf_hub_download_local(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", local_dir="models/loras")
37
  print("Downloads complete.")
38
 
39
+ # --- Image Processing Functions ---
40
+ def calculate_video_dimensions(width, height, max_size=832, min_size=480):
41
+ """
42
+ Calculate video dimensions based on input image size.
43
+ Larger dimension becomes max_size, smaller becomes proportional.
44
+ If square, use min_size x min_size.
45
+ Results are rounded to nearest multiple of 16.
46
+ """
47
+ # Handle square images
48
+ if width == height:
49
+ video_width = min_size
50
+ video_height = min_size
51
+ else:
52
+ # Calculate aspect ratio
53
+ aspect_ratio = width / height
54
+
55
+ if width > height:
56
+ # Landscape orientation
57
+ video_width = max_size
58
+ video_height = int(max_size / aspect_ratio)
59
+ else:
60
+ # Portrait orientation
61
+ video_height = max_size
62
+ video_width = int(max_size * aspect_ratio)
63
+
64
+ # Round to nearest multiple of 16
65
+ video_width = round(video_width / 16) * 16
66
+ video_height = round(video_height / 16) * 16
67
+
68
+ # Ensure minimum size
69
+ video_width = max(video_width, 16)
70
+ video_height = max(video_height, 16)
71
+
72
+ return video_width, video_height
73
+
74
+ def resize_and_crop_to_match(target_image, reference_image):
75
+ """
76
+ Resize and center crop target_image to match reference_image dimensions.
77
+ """
78
+ ref_width, ref_height = reference_image.size
79
+ target_width, target_height = target_image.size
80
+
81
+ # Calculate scaling factor to ensure target covers reference dimensions
82
+ scale = max(ref_width / target_width, ref_height / target_height)
83
+
84
+ # Resize target image
85
+ new_width = int(target_width * scale)
86
+ new_height = int(target_height * scale)
87
+ resized = target_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
88
+
89
+ # Center crop to match reference dimensions
90
+ left = (new_width - ref_width) // 2
91
+ top = (new_height - ref_height) // 2
92
+ right = left + ref_width
93
+ bottom = top + ref_height
94
+
95
+ cropped = resized.crop((left, top, right, bottom))
96
+ return cropped
97
+
98
  # --- Boilerplate code from the original script ---
99
  def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
100
  """Returns the value at the given index of a sequence or mapping.
 
275
  end_image_pil,
276
  prompt,
277
  negative_prompt="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝,",
278
+ duration=33,
279
  progress=gr.Progress(track_tqdm=True)
280
  ):
281
  """
 
283
  This function is called every time the user clicks the 'Generate' button.
284
  """
285
  FPS = 16
286
+
287
+ # Process images: resize and crop second image to match first
288
+ # The first image determines the dimensions
289
+ processed_start_image = start_image_pil.copy()
290
+ processed_end_image = resize_and_crop_to_match(end_image_pil, start_image_pil)
291
+
292
+ # Calculate video dimensions based on the first image
293
+ video_width, video_height = calculate_video_dimensions(
294
+ processed_start_image.width,
295
+ processed_start_image.height
296
+ )
297
+
298
+ print(f"Input image size: {processed_start_image.width}x{processed_start_image.height}")
299
+ print(f"Video dimensions: {video_width}x{video_height}")
300
 
301
  clip = MODELS_AND_NODES["clip"]
302
  vae = MODELS_AND_NODES["vae"]
 
315
  createvideo = MODELS_AND_NODES["CreateVideo"]
316
  savevideo = MODELS_AND_NODES["SaveVideo"]
317
 
318
+ # Save processed images to temporary files
319
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as start_file, \
320
  tempfile.NamedTemporaryFile(suffix=".png", delete=False) as end_file:
321
+ processed_start_image.save(start_file.name)
322
+ processed_end_image.save(end_file.name)
323
  start_image_path = start_file.name
324
  end_image_path = end_file.name
325
 
 
341
 
342
  progress(0.2, desc="Preparing initial latents...")
343
  initial_latents = wanfirstlastframetovideo.EXECUTE_NORMALIZED(
344
+ width=video_width, height=video_height, length=duration, batch_size=1,
345
  positive=get_value_at_index(positive_conditioning, 0),
346
  negative=get_value_at_index(negative_conditioning, 0),
347
  vae=get_value_at_index(vae, 0),
 
393
  progress(1.0, desc="Done!")
394
  return f"output/{save_result['ui']['images'][0]['filename']}"
395
 
396
+
397
+
398
  css = '''
399
  .fillable{max-width: 1100px !important}
400
  .dark .progress-text {color: white}
 
402
  with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
403
  gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
404
  gr.Markdown("Running the [Wan 2.2 First/Last Frame ComfyUI workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/) on ZeroGPU")
405
+
406
  with gr.Row():
407
  with gr.Column():
408
  with gr.Row():
 
412
  prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images", value="transition")
413
 
414
  with gr.Accordion("Advanced Settings", open=False):
415
+ duration = gr.Radio(
416
+ [("Short (2s)", 33), ("Mid (4s)", 66)],
417
+ value=33,
418
+ label="Video Duration",
 
 
 
419
  )
420
  negative_prompt = gr.Textbox(
421
  label="Negative Prompt",
 
437
  gr.Examples(
438
  examples=[
439
  ["poli_tower.png", "tower_takes_off.png", "the man turns"],
440
+ ["capybara_zoomed.png", "capybara.png", "a dramatic dolly zoom"],
441
  ],
442
  inputs=[start_image, end_image, prompt],
443
  outputs=output_video,