MultiPerson

Running on Zero

App Files Files Community

C4G-HKUST commited on 14 days ago

Commit

12835e4

1 Parent(s): b87c83f

feat: time out check

Browse files

Files changed (2) hide show

app.py +12 -15
wan/utils/infer_utils.py +6 -10

app.py CHANGED Viewed

@@ -485,9 +485,8 @@ def run_graio_demo(args):
                 # Fast模式：如果trim_to_6s为True，强制限制为6秒对应的帧数
                 if trim_to_6s:
-                    # 计算6秒对应的帧数（4n+1格式）
-                    max_frames_6s = int(math.ceil(6.0 * fps))
-                    max_frames_6s = ((max_frames_6s - 1) // 4) * 4 + 1
                     current_frame_num = min(calculated_frame_num, max_frames_6s)
                     logging.warning(f"Fast mode: Audio duration exceeds 6 seconds. Trimming to 6 seconds ({max_frames_6s} frames). Original: {calculated_frame_num} frames")
                 else:
@@ -618,8 +617,8 @@ def run_graio_demo(args):
     # 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/app.py
     # @spaces.GPU 装饰器会自动处理 GPU 初始化，不需要手动初始化
-    # 快速生成模式：100秒，固定10步去噪
-    @spaces.GPU(duration=100)
     def gpu_wrapped_generate_video_fast(*args, **kwargs):
         # 固定使用10步去噪，通过关键字参数传递
         kwargs['fixed_steps'] = 8
@@ -663,9 +662,8 @@ def run_graio_demo(args):
                 fps = getattr(cfg, 'fps', 24)
                 try:
                     calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
-                    # 计算6秒对应的帧数
-                    max_frames_6s = int(math.ceil(6.0 * fps))
-                    max_frames_6s = ((max_frames_6s - 1) // 4) * 4 + 1
                     if calculated_frame_num > max_frames_6s:
                         # 超过6秒，设置trim_to_6s标记
@@ -837,7 +835,7 @@ def run_graio_demo(args):
                 with gr.Row():
                     run_i2v_button_fast = gr.Button(
-                        "Generate Video (Fast - 100s, 8 steps)",
                         variant="secondary",
                         scale=1
                     )
@@ -848,10 +846,10 @@ def run_graio_demo(args):
                     )
                 gr.Markdown("""
                 **Generation Modes:**
-                - **Fast Mode (up to 100s GPU budget)**: Fixed 8 denoising steps for quick generation.
                 - **Quality Mode (up to 720s GPU budget)**: Custom denoising steps (adjustable via "Diffusion steps" slider, default: 25 steps).
-                *Note: The GPU duration (100s/720s) represents the maximum budget allocated, not the actual generation time. Multi-person videos generally require longer duration and more Usage Quota for better quality.*
                 """)
             with gr.Column(scale=2):
@@ -913,9 +911,8 @@ def run_graio_demo(args):
                 fps = getattr(cfg, 'fps', 24)
                 try:
                     calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
-                    # 计算6秒对应的帧数
-                    max_frames_6s = int(math.ceil(6.0 * fps))
-                    max_frames_6s = ((max_frames_6s - 1) // 4) * 4 + 1
                     if calculated_frame_num > max_frames_6s:
                         # 超过6秒，立即显示警告
@@ -936,7 +933,7 @@ def run_graio_demo(args):
             result = gpu_wrapped_generate_video_quality(*args)
             return result
-        # 快速生成按钮：100秒，固定10步
         run_i2v_button_fast.click(
             fn=handle_fast_generation,
             inputs=[img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3, sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector],

                 # Fast模式：如果trim_to_6s为True，强制限制为6秒对应的帧数
                 if trim_to_6s:
+                    # 6秒固定为145帧（4n+1格式）
+                    max_frames_6s = 145
                     current_frame_num = min(calculated_frame_num, max_frames_6s)
                     logging.warning(f"Fast mode: Audio duration exceeds 6 seconds. Trimming to 6 seconds ({max_frames_6s} frames). Original: {calculated_frame_num} frames")
                 else:
     # 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/app.py
     # @spaces.GPU 装饰器会自动处理 GPU 初始化，不需要手动初始化
+    # 快速生成模式：200秒，固定10步去噪
+    @spaces.GPU(duration=200)
     def gpu_wrapped_generate_video_fast(*args, **kwargs):
         # 固定使用10步去噪，通过关键字参数传递
         kwargs['fixed_steps'] = 8
                 fps = getattr(cfg, 'fps', 24)
                 try:
                     calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
+                    # 6秒固定为145帧
+                    max_frames_6s = 145
                     if calculated_frame_num > max_frames_6s:
                         # 超过6秒，设置trim_to_6s标记
                 with gr.Row():
                     run_i2v_button_fast = gr.Button(
+                        "Generate Video (Fast - 200s, 8 steps)",
                         variant="secondary",
                         scale=1
                     )
                     )
                 gr.Markdown("""
                 **Generation Modes:**
+                - **Fast Mode (up to 200s GPU budget)**: Fixed 8 denoising steps for quick generation.
                 - **Quality Mode (up to 720s GPU budget)**: Custom denoising steps (adjustable via "Diffusion steps" slider, default: 25 steps).
+                *Note: The GPU duration (200s/720s) represents the maximum budget allocated, not the actual generation time. Multi-person videos generally require longer duration and more Usage Quota for better quality.*
                 """)
             with gr.Column(scale=2):
                 fps = getattr(cfg, 'fps', 24)
                 try:
                     calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
+                    # 6秒固定为145帧
+                    max_frames_6s = 145
                     if calculated_frame_num > max_frames_6s:
                         # 超过6秒，立即显示警告
             result = gpu_wrapped_generate_video_quality(*args)
             return result
+        # 快速生成按钮：200秒，固定10步
         run_i2v_button_fast.click(
             fn=handle_fast_generation,
             inputs=[img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3, sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector],

wan/utils/infer_utils.py CHANGED Viewed

@@ -205,10 +205,8 @@ def process_audio_features(
             # Fast mode: trim to 6 seconds if trim_to_6s is True
             if trim_to_6s:
-                import math
-                # Calculate 6 seconds in frames
-                max_frames_6s = int(math.ceil(6.0 * fps))
-                max_frames_6s = ((max_frames_6s - 1) // 4) * 4 + 1
                 if total_length > max_frames_6s:
                     print(f"Fast mode: Trimming audio from {total_length} frames to {max_frames_6s} frames (6 seconds)")
                     # Truncate each audio proportionally
@@ -299,9 +297,8 @@ def process_audio_features(
                         # Fast mode: if trim_to_6s, limit to 6 seconds
                         target_frames = F
                         if trim_to_6s:
-                            import math
-                            max_frames_6s = int(math.ceil(6.0 * fps))
-                            max_frames_6s = ((max_frames_6s - 1) // 4) * 4 + 1
                             target_frames = min(F, max_frames_6s)
                             if F > max_frames_6s:
                                 print(f"Fast mode: Trimming audio {i} from {F} frames to {max_frames_6s} frames (6 seconds)")
@@ -349,9 +346,8 @@ def process_audio_features(
                 # Fast mode: if trim_to_6s, limit to 6 seconds
                 target_frames = F
                 if trim_to_6s:
-                    import math
-                    max_frames_6s = int(math.ceil(6.0 * fps))
-                    max_frames_6s = ((max_frames_6s - 1) // 4) * 4 + 1
                     target_frames = min(F, max_frames_6s)
                     if F > max_frames_6s:
                         print(f"Fast mode: Trimming single audio from {F} frames to {max_frames_6s} frames (6 seconds)")

             # Fast mode: trim to 6 seconds if trim_to_6s is True
             if trim_to_6s:
+                # 6秒固定为145帧
+                max_frames_6s = 145
                 if total_length > max_frames_6s:
                     print(f"Fast mode: Trimming audio from {total_length} frames to {max_frames_6s} frames (6 seconds)")
                     # Truncate each audio proportionally
                         # Fast mode: if trim_to_6s, limit to 6 seconds
                         target_frames = F
                         if trim_to_6s:
+                            # 6秒固定为145帧
+                            max_frames_6s = 145
                             target_frames = min(F, max_frames_6s)
                             if F > max_frames_6s:
                                 print(f"Fast mode: Trimming audio {i} from {F} frames to {max_frames_6s} frames (6 seconds)")
                 # Fast mode: if trim_to_6s, limit to 6 seconds
                 target_frames = F
                 if trim_to_6s:
+                    # 6秒固定为145帧
+                    max_frames_6s = 145
                     target_frames = min(F, max_frames_6s)
                     if F > max_frames_6s:
                         print(f"Fast mode: Trimming single audio from {F} frames to {max_frames_6s} frames (6 seconds)")