Spaces:

multimodalart
/

wan-2-2-first-last-frame

Running on Zero

App Files Files Community

multimodalart HF Staff commited on Sep 8

Commit

b118cac

verified ·

1 Parent(s): 715bd60

Add generate tab

Browse files

Files changed (1) hide show

app.py +74 -10

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ import numpy as np
 from PIL import Image
 import random
 import gc
 # Import the optimization function from the separate file
 from optimization import optimize_pipeline_
@@ -66,9 +67,8 @@ for i in range(3):
     torch.cuda.synchronize()
     torch.cuda.empty_cache()
-# Calling the imported optimization function with a placeholder image for compilation tracing
 optimize_pipeline_(pipe,
-    image=Image.new('RGB', (MAX_DIMENSION, MIN_DIMENSION)), # Use representative dims
     prompt='prompt',
     height=MIN_DIMENSION,
     width=MAX_DIMENSION,
@@ -78,6 +78,43 @@ print("All models loaded and optimized. Gradio app is ready.")
 # --- 2. Image Processing and Application Logic ---
 def process_image_for_video(image: Image.Image) -> Image.Image:
     """
@@ -199,23 +236,37 @@ def generate_video(
     return video_path, current_seed
-# --- 3. Gradio User Interface --- (No changes needed here)
 css = '''
 .fillable{max-width: 1100px !important}
 .dark .progress-text {color: white}
 '''
 with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
     gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
     gr.Markdown("Based on the [Wan 2.2 First/Last Frame workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/), applied to 🧨 Diffusers + [lightx2v/Wan2.2-Lightning](https://huggingface.co/lightx2v/Wan2.2-Lightning) 8-step LoRA")
-    with gr.Row():
         with gr.Column():
-            with gr.Group():
                 with gr.Row():
                     start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
-                    end_image = gr.Image(type="pil", label="End Frame", sources=["upload", "clipboard"])
                 prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images")
                 with gr.Accordion("Advanced Settings", open=False):
@@ -233,7 +284,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
         with gr.Column():
             output_video = gr.Video(label="Generated Video", autoplay=True)
-    # Define the inputs list for the click event
     ui_inputs = [
         start_image,
         end_image,
@@ -246,7 +297,6 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
         seed_input,
         randomize_seed_checkbox
     ]
-    # The seed_input is both an input and an output to reflect the randomly generated seed
     ui_outputs = [output_video, seed_input]
     generate_button.click(
@@ -255,6 +305,20 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
         outputs=ui_outputs
     )
     gr.Examples(
         examples=[
             ["poli_tower.png", "tower_takes_off.png", "the man turns around"],
@@ -268,4 +332,4 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
     )
 if __name__ == "__main__":
-    app.launch(share=True, show_error=True)

 from PIL import Image
 import random
 import gc
+from gradio_client import Client, handle_file # Import for API call
 # Import the optimization function from the separate file
 from optimization import optimize_pipeline_
     torch.cuda.synchronize()
     torch.cuda.empty_cache()
 optimize_pipeline_(pipe,
+    image=Image.new('RGB', (MAX_DIMENSION, MIN_DIMENSION)),
     prompt='prompt',
     height=MIN_DIMENSION,
     width=MAX_DIMENSION,
 # --- 2. Image Processing and Application Logic ---
+def generate_end_frame(start_img, gen_prompt, progress=gr.Progress(track_tqdm=True)):
+    """Calls an external Gradio API to generate an image."""
+    if start_img is None:
+        raise gr.Error("Please provide a Start Frame first.")
+    hf_token = os.getenv("HF_TOKEN")
+    if not hf_token:
+        raise gr.Error("HF_TOKEN not found in environment variables. Please set it in your Space secrets.")
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
+        start_img.save(tmpfile.name)
+        tmp_path = tmpfile.name
+    progress(0.1, desc="Connecting to image generation API...")
+    client = Client("multimodalart/nano-banana")
+    progress(0.5, desc=f"Generating with prompt: '{gen_prompt}'...")
+    try:
+        result = client.predict(
+            prompt=gen_prompt,
+            images=[
+                {"image": handle_file(tmp_path)}
+            ],
+            manual_token=hf_token,
+            api_name="/unified_image_generator"
+        )
+    finally:
+        os.remove(tmp_path)
+    progress(1.0, desc="Done!")
+    print(result)
+    return result
+def switch_to_upload_tab():
+    """Returns a gr.Tabs update to switch to the first tab."""
+    return gr.Tabs(selected="upload_tab")
 def process_image_for_video(image: Image.Image) -> Image.Image:
     """
     return video_path, current_seed
+# --- 3. Gradio User Interface ---
 css = '''
 .fillable{max-width: 1100px !important}
 .dark .progress-text {color: white}
+#general_items{margin-top: 2em}
+#group_all{overflow:visible}
+#group_all .styler{overflow:visible}
+#group_tabs .tabitem{padding: 0}
+.tab-wrapper{margin-top: -33px;z-index: 999;position: absolute;width: 100%;background-color: var(--block-background-fill);padding: 0;}
+#component-9-button{width: 50%;justify-content: center}
+#component-11-button{width: 50%;justify-content: center}
+#or_item{text-align: center; padding-top: 1em; padding-bottom: 1em; font-size: 1.1em;margin-left: .5em;margin-right: .5em;width: calc(100% - 1em)}
+#fivesec{margin-top: 5em;margin-left: .5em;margin-right: .5em;width: calc(100% - 1em)}
 '''
 with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
     gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
     gr.Markdown("Based on the [Wan 2.2 First/Last Frame workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/), applied to 🧨 Diffusers + [lightx2v/Wan2.2-Lightning](https://huggingface.co/lightx2v/Wan2.2-Lightning) 8-step LoRA")
+    with gr.Row(elem_id="general_items"):
         with gr.Column():
+            with gr.Group(elem_id="group_all"):
                 with gr.Row():
                     start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
+                    # Capture the Tabs component in a variable and assign IDs to tabs
+                    with gr.Tabs(elem_id="group_tabs") as tabs:
+                        with gr.TabItem("Upload", id="upload_tab"):
+                            end_image = gr.Image(type="pil", label="End Frame", sources=["upload", "clipboard"])
+                        with gr.TabItem("Generate", id="generate_tab"):
+                            generate_5seconds = gr.Button("Generate scene 5 seconds in the future", elem_id="fivesec")
+                            gr.Markdown("Generate a custom end-frame with an edit model like [Nano Banana](https://huggingface.co/spaces/multimodalart/nano-banana) or [Qwen Image Edit](https://huggingface.co/spaces/multimodalart/Qwen-Image-Edit-Fast)", elem_id="or_item")
                 prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images")
                 with gr.Accordion("Advanced Settings", open=False):
         with gr.Column():
             output_video = gr.Video(label="Generated Video", autoplay=True)
+    # Main video generation button
     ui_inputs = [
         start_image,
         end_image,
         seed_input,
         randomize_seed_checkbox
     ]
     ui_outputs = [output_video, seed_input]
     generate_button.click(
         outputs=ui_outputs
     )
+    generate_5seconds.click(
+        fn=switch_to_upload_tab,
+        inputs=None,
+        outputs=[tabs]
+    ).then(
+        fn=lambda img: generate_end_frame(img, "this image is a still frame from a movie. generate a new frame with what happens on this scene 5 seconds in the future"),
+        inputs=[start_image],
+        outputs=[end_image]
+    ).then(
+        fn=generate_video,
+        inputs=ui_inputs,
+        outputs=ui_outputs
+    )
     gr.Examples(
         examples=[
             ["poli_tower.png", "tower_takes_off.png", "the man turns around"],
     )
 if __name__ == "__main__":
+    app.launch(share=True)