wan2_2-I2V-14B-FAST

Runtime error

App Files Files Community

seawolf2357 commited on Oct 6

Commit

8554f37

verified ·

1 Parent(s): e75a609

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -73

app.py CHANGED Viewed

@@ -73,16 +73,12 @@ aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
 aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
-default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
-default_negative_prompt = "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, 整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, 画得不好的手部, 画得不好的脸部, 畸形的, 毁容的, 形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
 def resize_image(image: Image.Image) -> Image.Image:
-    """
-    Resizes an image to fit within the model's constraints, preserving aspect ratio as much as possible.
-    """
     width, height = image.size
-    # Handle square case
     if width == height:
         return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
@@ -94,22 +90,20 @@ def resize_image(image: Image.Image) -> Image.Image:
     image_to_resize = image
     if aspect_ratio > MAX_ASPECT_RATIO:
-        # Very wide image -> crop width to fit 832x480 aspect ratio
         target_w, target_h = MAX_DIM, MIN_DIM
         crop_width = int(round(height * MAX_ASPECT_RATIO))
         left = (width - crop_width) // 2
         image_to_resize = image.crop((left, 0, left + crop_width, height))
     elif aspect_ratio < MIN_ASPECT_RATIO:
-        # Very tall image -> crop height to fit 480x832 aspect ratio
         target_w, target_h = MIN_DIM, MAX_DIM
         crop_height = int(round(width / MIN_ASPECT_RATIO))
         top = (height - crop_height) // 2
         image_to_resize = image.crop((0, top, width, top + crop_height))
     else:
-        if width > height:  # Landscape
             target_w = MAX_DIM
             target_h = int(round(target_w / aspect_ratio))
-        else:  # Portrait
             target_h = MAX_DIM
             target_w = int(round(target_h * aspect_ratio))
@@ -163,48 +157,8 @@ def generate_video(
     randomize_seed = False,
     progress=gr.Progress(track_tqdm=True),
 ):
-    """
-    Generate a video from an input image using the Wan 2.2 14B I2V model with Lightning LoRA.
-    This function takes an input image and generates a video animation based on the provided
-    prompt and parameters. It uses an FP8 qunatized Wan 2.2 14B Image-to-Video model in with Lightning LoRA
-    for fast generation in 4-8 steps.
-    Args:
-        input_image (PIL.Image): The input image to animate. Will be resized to target dimensions.
-        prompt (str): Text prompt describing the desired animation or motion.
-        steps (int, optional): Number of inference steps. More steps = higher quality but slower.
-            Defaults to 4. Range: 1-30.
-        negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
-            Defaults to default_negative_prompt (contains unwanted visual artifacts).
-        duration_seconds (float, optional): Duration of the generated video in seconds.
-            Defaults to 2. Clamped between MIN_FRAMES_MODEL/FIXED_FPS and MAX_FRAMES_MODEL/FIXED_FPS.
-        guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
-            Defaults to 1.0. Range: 0.0-20.0.
-        guidance_scale_2 (float, optional): Controls adherence to the prompt. Higher values = more adherence.
-            Defaults to 1.0. Range: 0.0-20.0.
-        seed (int, optional): Random seed for reproducible results. Defaults to 42.
-            Range: 0 to MAX_SEED (2147483647).
-        randomize_seed (bool, optional): Whether to use a random seed instead of the provided seed.
-            Defaults to False.
-        progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
-    Returns:
-        tuple: A tuple containing:
-            - video_path (str): Path to the generated video file (.mp4)
-            - current_seed (int): The seed used for generation (useful when randomize_seed=True)
-    Raises:
-        gr.Error: If input_image is None (no image uploaded).
-    Note:
-        - Frame count is calculated as duration_seconds * FIXED_FPS (24)
-        - Output dimensions are adjusted to be multiples of MOD_VALUE (32)
-        - The function uses GPU acceleration via the @spaces.GPU decorator
-        - Generation time varies based on steps and duration (see get_duration function)
-    """
     if input_image is None:
-        raise gr.Error("Please upload an input image.")
     num_frames = get_num_frames(duration_seconds)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
@@ -230,26 +184,35 @@ def generate_video(
     return video_path, current_seed
-with gr.Blocks() as demo:
-    gr.Markdown("# Fast 4 steps Wan 2.2 I2V (14B) with Lightning LoRA")
-    gr.Markdown("run Wan 2.2 in just 4-8 steps, with [Lightning LoRA](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Wan22-Lightning), fp8 quantization & AoT compilation - compatible with 🧨 diffusers and ZeroGPU⚡️")
     with gr.Row():
-        with gr.Column():
-            input_image_component = gr.Image(type="pil", label="Input Image")
-            prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
-            duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
-            with gr.Accordion("Advanced Settings", open=False):
-                negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
-                seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
-                randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
-                steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
-                guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
-                guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
-            generate_button = gr.Button("Generate Video", variant="primary")
-        with gr.Column():
-            video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
     ui_inputs = [
         input_image_component, prompt_input, steps_slider,
@@ -262,21 +225,24 @@ with gr.Blocks() as demo:
         examples=[
             [
                 "wan_i2v_input.JPG",
-                "POV selfie video, white cat with sunglasses standing on surfboard, relaxed smile, tropical beach behind (clear water, green hills, blue sky with clouds). Surfboard tips, cat falls into ocean, camera plunges underwater with bubbles and sunlight beams. Brief underwater view of cat’s face, then cat resurfaces, still filming selfie, playful summer vacation mood.",
                 4,
             ],
             [
                 "wan22_input_2.jpg",
-                "A sleek lunar vehicle glides into view from left to right, kicking up moon dust as astronauts in white spacesuits hop aboard with characteristic lunar bouncing movements. In the distant background, a VTOL craft descends straight down and lands silently on the surface. Throughout the entire scene, ethereal aurora borealis ribbons dance across the star-filled sky, casting shimmering curtains of green, blue, and purple light that bathe the lunar landscape in an otherworldly, magical glow.",
                 4,
             ],
             [
                 "kill_bill.jpeg",
-                "Uma Thurman's character, Beatrix Kiddo, holds her razor-sharp katana blade steady in the cinematic lighting. Suddenly, the polished steel begins to soften and distort, like heated metal starting to lose its structural integrity. The blade's perfect edge slowly warps and droops, molten steel beginning to flow downward in silvery rivulets while maintaining its metallic sheen. The transformation starts subtly at first - a slight bend in the blade - then accelerates as the metal becomes increasingly fluid. The camera holds steady on her face as her piercing eyes gradually narrow, not with lethal focus, but with confusion and growing alarm as she watches her weapon dissolve before her eyes. Her breathing quickens slightly as she witnesses this impossible transformation. The melting intensifies, the katana's perfect form becoming increasingly abstract, dripping like liquid mercury from her grip. Molten droplets fall to the ground with soft metallic impacts. Her expression shifts from calm readiness to bewilderment and concern as her legendary instrument of vengeance literally liquefies in her hands, leaving her defenseless and disoriented.",
                 6,
             ],
         ],
-        inputs=[input_image_component, prompt_input, steps_slider], outputs=[video_output, seed_input], fn=generate_video, cache_examples="lazy"
     )
 if __name__ == "__main__":

 aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
+default_prompt_i2v = "이 이미지에 생동감을 부여하고, 영화 같은 움직임과 부드러운 애니메이션을 적용"
+default_negative_prompt = "색조 선명, 과다 노출, 정적, 세부 흐림, 자막, 스타일, 작품, 그림, 화면, 정지, 회색조, 최악 품질, 저품질, JPEG 압축, 추함, 불완전, 추가 손가락, 잘못 그려진 손, 잘못 그려진 얼굴, 기형, 변형, 형태 불량 사지, 손가락 융합, 정지 화면, 지저분한 배경, 세 개의 다리, 배경 사람 많음, 뒤로 걷기"
 def resize_image(image: Image.Image) -> Image.Image:
     width, height = image.size
     if width == height:
         return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
     image_to_resize = image
     if aspect_ratio > MAX_ASPECT_RATIO:
         target_w, target_h = MAX_DIM, MIN_DIM
         crop_width = int(round(height * MAX_ASPECT_RATIO))
         left = (width - crop_width) // 2
         image_to_resize = image.crop((left, 0, left + crop_width, height))
     elif aspect_ratio < MIN_ASPECT_RATIO:
         target_w, target_h = MIN_DIM, MAX_DIM
         crop_height = int(round(width / MIN_ASPECT_RATIO))
         top = (height - crop_height) // 2
         image_to_resize = image.crop((0, top, width, top + crop_height))
     else:
+        if width > height:
             target_w = MAX_DIM
             target_h = int(round(target_w / aspect_ratio))
+        else:
             target_h = MAX_DIM
             target_w = int(round(target_h * aspect_ratio))
     randomize_seed = False,
     progress=gr.Progress(track_tqdm=True),
 ):
     if input_image is None:
+        raise gr.Error("이미지를 업로드해주세요.")
     num_frames = get_num_frames(duration_seconds)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     return video_path, current_seed
+# 세련된 한글 UI
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎬 이미지를 영상으로 변환")
+    gr.Markdown("**Wan 2.2 Lightning** - 4~8단계로 빠른 영상 생성")
     with gr.Row():
+        with gr.Column(scale=1):
+            input_image_component = gr.Image(type="pil", label="입력 이미지")
+            prompt_input = gr.Textbox(label="프롬프트", value=default_prompt_i2v, lines=2)
+            duration_seconds_input = gr.Slider(
+                minimum=MIN_DURATION,
+                maximum=MAX_DURATION,
+                step=0.1,
+                value=3.5,
+                label="영상 길이 (초)"
+            )
+            with gr.Accordion("고급 설정", open=False):
+                negative_prompt_input = gr.Textbox(label="네거티브 프롬프트", value=default_negative_prompt, lines=2)
+                steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="생성 단계")
+                guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="가이던스 스케일 1")
+                guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="가이던스 스케일 2")
+                seed_input = gr.Slider(label="시드", minimum=0, maximum=MAX_SEED, step=1, value=42)
+                randomize_seed_checkbox = gr.Checkbox(label="랜덤 시드 사용", value=True)
+            generate_button = gr.Button("🎥 영상 생성", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            video_output = gr.Video(label="생성된 영상", autoplay=True, interactive=False)
     ui_inputs = [
         input_image_component, prompt_input, steps_slider,
         examples=[
             [
                 "wan_i2v_input.JPG",
+                "POV 셀카 영상, 선글라스 낀 흰 고양이가 서핑보드에 서서 편안한 미소. 배경에 열대 해변(맑은 물, 녹색 언덕, 구름 낀 푸른 하늘). 서핑보드가 기울어지고 고양이가 바다로 떨어지며 카메라가 거품과 햇빛과 함께 물속으로 빠짐. 잠깐 물속에서 고양이 얼굴 보이다가 다시 수면 위로 올라와 셀카 촬영 계속, 즐거운 여름 휴가 분위기.",
                 4,
             ],
             [
                 "wan22_input_2.jpg",
+                "세련된 달 탐사 차량이 왼쪽에서 오른쪽으로 미끄러지듯 이동하며 달 먼지를 일으킴. 흰 우주복을 입은 우주인들이 달 특유의 뛰는 동작으로 탑승. 먼 배경에서 VTOL 비행체가 수직으로 하강하여 표면에 조용히 착륙. 장면 전체에 걸쳐 초현실적인 오로라가 별이 가득한 하늘을 가로지르며 춤추고, 녹색, 파란색, 보라색 빛의 커튼이 달 풍경을 신비롭고 마법 같은 빛으로 감쌈.",
                 4,
             ],
             [
                 "kill_bill.jpeg",
+                "우마 서먼의 캐릭터 베아트릭스 키도가 영화 같은 조명 속에서 날카로운 카타나 검을 안정적으로 들고 있음. 갑자기 광택 나는 강철이 부드러워지고 왜곡되기 시작하며 가열된 금속처럼 구조적 완전성을 잃기 시작. 검날의 완벽한 끝이 천천히 휘어지고 늘어지며, 녹은 강철이 은빛 물줄기로 아래로 흘러내림. 변형은 처음에는 미묘하게 시작되다가 금속이 점점 더 유동적이 되면서 가속화. 카메라는 그녀의 얼굴을 고정하고 날카로운 눈빛이 점차 좁아지는데, 치명적인 집중이 아니라 무기가 눈앞에서 녹는 것을 보며 혼란과 경악. 호흡이 약간 빨라지며 이 불가능한 변형을 목격. 녹는 현상이 강화되고 카타나의 완벽한 형태가 점점 추상적이 되며 손에서 수은처럼 떨어짐. 녹은 방울이 부드러운 금속 충격음과 함께 바닥에 떨어짐. 표정이 차분한 준비에서 당혹감과 우려로 바뀌며 전설적인 복수의 도구가 손에서 문자 그대로 액화되어 무방비 상태가 됨.",
                 6,
             ],
         ],
+        inputs=[input_image_component, prompt_input, steps_slider],
+        outputs=[video_output, seed_input],
+        fn=generate_video,
+        cache_examples="lazy"
     )
 if __name__ == "__main__":