Spaces:

multimodalart
/

wan-2-2-first-last-frame

Running on Zero

App Files Files Community

multimodalart HF Staff commited on Aug 31

Commit

06ce900

verified ·

1 Parent(s): 32ba0e7

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -14

app.py CHANGED Viewed

@@ -36,6 +36,65 @@ hf_hub_download_local(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/
 hf_hub_download_local(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", local_dir="models/loras")
 print("Downloads complete.")
 # --- Boilerplate code from the original script ---
 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     """Returns the value at the given index of a sequence or mapping.
@@ -216,7 +275,7 @@ def generate_video(
     end_image_pil,
     prompt,
     negative_prompt="色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走,过曝，",
-    duration=2,
     progress=gr.Progress(track_tqdm=True)
 ):
     """
@@ -224,7 +283,20 @@ def generate_video(
     This function is called every time the user clicks the 'Generate' button.
     """
     FPS = 16
-    num_frames = max(2, int(duration * FPS))
     clip = MODELS_AND_NODES["clip"]
     vae = MODELS_AND_NODES["vae"]
@@ -243,11 +315,11 @@ def generate_video(
     createvideo = MODELS_AND_NODES["CreateVideo"]
     savevideo = MODELS_AND_NODES["SaveVideo"]
-    # Save uploaded images to temporary files
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as start_file, \
          tempfile.NamedTemporaryFile(suffix=".png", delete=False) as end_file:
-        start_image_pil.save(start_file.name)
-        end_image_pil.save(end_file.name)
         start_image_path = start_file.name
         end_image_path = end_file.name
@@ -269,7 +341,7 @@ def generate_video(
         progress(0.2, desc="Preparing initial latents...")
         initial_latents = wanfirstlastframetovideo.EXECUTE_NORMALIZED(
-            width=480, height=480, length=num_frames, batch_size=1,
             positive=get_value_at_index(positive_conditioning, 0),
             negative=get_value_at_index(negative_conditioning, 0),
             vae=get_value_at_index(vae, 0),
@@ -321,6 +393,8 @@ def generate_video(
         progress(1.0, desc="Done!")
         return f"output/{save_result['ui']['images'][0]['filename']}"
 css = '''
 .fillable{max-width: 1100px !important}
 .dark .progress-text {color: white}
@@ -328,6 +402,7 @@ css = '''
 with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
     gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
     gr.Markdown("Running the [Wan 2.2 First/Last Frame ComfyUI workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/) on ZeroGPU")
     with gr.Row():
         with gr.Column():
             with gr.Row():
@@ -337,13 +412,10 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
             prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images", value="transition")
             with gr.Accordion("Advanced Settings", open=False):
-                duration = gr.Slider(
-                    minimum=1.0,
-                    maximum=5.0,
-                    value=2.0,
-                    step=0.1,
-                    label="Video Duration (seconds)",
-                    info="Longer videos take longer to generate"
                 )
                 negative_prompt = gr.Textbox(
                     label="Negative Prompt",
@@ -365,7 +437,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
     gr.Examples(
         examples=[
             ["poli_tower.png", "tower_takes_off.png", "the man turns"],
-            ["capybara_zoomed.png", "capybara.webp", "a dramatic dolly zoom"],
         ],
         inputs=[start_image, end_image, prompt],
         outputs=output_video,

 hf_hub_download_local(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", local_dir="models/loras")
 print("Downloads complete.")
+# --- Image Processing Functions ---
+def calculate_video_dimensions(width, height, max_size=832, min_size=480):
+    """
+    Calculate video dimensions based on input image size.
+    Larger dimension becomes max_size, smaller becomes proportional.
+    If square, use min_size x min_size.
+    Results are rounded to nearest multiple of 16.
+    """
+    # Handle square images
+    if width == height:
+        video_width = min_size
+        video_height = min_size
+    else:
+        # Calculate aspect ratio
+        aspect_ratio = width / height
+        if width > height:
+            # Landscape orientation
+            video_width = max_size
+            video_height = int(max_size / aspect_ratio)
+        else:
+            # Portrait orientation
+            video_height = max_size
+            video_width = int(max_size * aspect_ratio)
+    # Round to nearest multiple of 16
+    video_width = round(video_width / 16) * 16
+    video_height = round(video_height / 16) * 16
+    # Ensure minimum size
+    video_width = max(video_width, 16)
+    video_height = max(video_height, 16)
+    return video_width, video_height
+def resize_and_crop_to_match(target_image, reference_image):
+    """
+    Resize and center crop target_image to match reference_image dimensions.
+    """
+    ref_width, ref_height = reference_image.size
+    target_width, target_height = target_image.size
+    # Calculate scaling factor to ensure target covers reference dimensions
+    scale = max(ref_width / target_width, ref_height / target_height)
+    # Resize target image
+    new_width = int(target_width * scale)
+    new_height = int(target_height * scale)
+    resized = target_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
+    # Center crop to match reference dimensions
+    left = (new_width - ref_width) // 2
+    top = (new_height - ref_height) // 2
+    right = left + ref_width
+    bottom = top + ref_height
+    cropped = resized.crop((left, top, right, bottom))
+    return cropped
 # --- Boilerplate code from the original script ---
 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     """Returns the value at the given index of a sequence or mapping.
     end_image_pil,
     prompt,
     negative_prompt="色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走,过曝，",
+    duration=33,
     progress=gr.Progress(track_tqdm=True)
 ):
     """
     This function is called every time the user clicks the 'Generate' button.
     """
     FPS = 16
+    # Process images: resize and crop second image to match first
+    # The first image determines the dimensions
+    processed_start_image = start_image_pil.copy()
+    processed_end_image = resize_and_crop_to_match(end_image_pil, start_image_pil)
+    # Calculate video dimensions based on the first image
+    video_width, video_height = calculate_video_dimensions(
+        processed_start_image.width,
+        processed_start_image.height
+    )
+    print(f"Input image size: {processed_start_image.width}x{processed_start_image.height}")
+    print(f"Video dimensions: {video_width}x{video_height}")
     clip = MODELS_AND_NODES["clip"]
     vae = MODELS_AND_NODES["vae"]
     createvideo = MODELS_AND_NODES["CreateVideo"]
     savevideo = MODELS_AND_NODES["SaveVideo"]
+    # Save processed images to temporary files
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as start_file, \
          tempfile.NamedTemporaryFile(suffix=".png", delete=False) as end_file:
+        processed_start_image.save(start_file.name)
+        processed_end_image.save(end_file.name)
         start_image_path = start_file.name
         end_image_path = end_file.name
         progress(0.2, desc="Preparing initial latents...")
         initial_latents = wanfirstlastframetovideo.EXECUTE_NORMALIZED(
+            width=video_width, height=video_height, length=duration, batch_size=1,
             positive=get_value_at_index(positive_conditioning, 0),
             negative=get_value_at_index(negative_conditioning, 0),
             vae=get_value_at_index(vae, 0),
         progress(1.0, desc="Done!")
         return f"output/{save_result['ui']['images'][0]['filename']}"
 css = '''
 .fillable{max-width: 1100px !important}
 .dark .progress-text {color: white}
 with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
     gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
     gr.Markdown("Running the [Wan 2.2 First/Last Frame ComfyUI workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/) on ZeroGPU")
     with gr.Row():
         with gr.Column():
             with gr.Row():
             prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images", value="transition")
             with gr.Accordion("Advanced Settings", open=False):
+                duration = gr.Radio(
+                    [("Short (2s)", 33), ("Mid (4s)", 66)],
+                    value=33,
+                    label="Video Duration",
                 )
                 negative_prompt = gr.Textbox(
                     label="Negative Prompt",
     gr.Examples(
         examples=[
             ["poli_tower.png", "tower_takes_off.png", "the man turns"],
+            ["capybara_zoomed.png", "capybara.png", "a dramatic dolly zoom"],
         ],
         inputs=[start_image, end_image, prompt],
         outputs=output_video,