C4G-HKUST commited on
Commit
d77df53
·
1 Parent(s): fc0b74d

feat: time out check

Browse files
Files changed (1) hide show
  1. app.py +133 -16
app.py CHANGED
@@ -611,14 +611,75 @@ def run_graio_demo(args):
611
  logging.info(f"No audio files provided, video saved to: {output_file}")
612
 
613
  logging.info("Finished.")
614
- return output_file
 
 
 
 
615
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  # 使用 @spaces.GPU 装饰器包装 generate_video 函数(参考 LivePortrait)
617
  # 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/app.py
618
  # @spaces.GPU 装饰器会自动处理 GPU 初始化,不需要手动初始化
619
 
620
- # 快速生成模式:121秒,固定10步去噪
621
- @spaces.GPU(duration=121)
622
  def gpu_wrapped_generate_video_fast(*args, **kwargs):
623
  # 固定使用10步去噪,通过关键字参数传递
624
  kwargs['fixed_steps'] = 8
@@ -680,8 +741,8 @@ def run_graio_demo(args):
680
 
681
  return gpu_wrapped_generate_video_worker(*args, **kwargs)
682
 
683
- # 高质量生成模式:720秒,用户选择去噪步数
684
- @spaces.GPU(duration=720)
685
  def gpu_wrapped_generate_video_quality(*args, **kwargs):
686
  return gpu_wrapped_generate_video_worker(*args, **kwargs)
687
 
@@ -735,7 +796,11 @@ def run_graio_demo(args):
735
  logging.warning(f"Failed to move models to GPU: {e}")
736
 
737
  result = generate_video(*args, **kwargs)
738
- return result
 
 
 
 
739
 
740
 
741
 
@@ -835,21 +900,21 @@ def run_graio_demo(args):
835
 
836
  with gr.Row():
837
  run_i2v_button_fast = gr.Button(
838
- "Generate Video (Fast - 121s, 8 steps)",
839
  variant="secondary",
840
  scale=1
841
  )
842
  run_i2v_button_quality = gr.Button(
843
- "Generate Video (Quality - 720s, Custom steps)",
844
  variant="primary",
845
  scale=1
846
  )
847
  gr.Markdown("""
848
  **Generation Modes:**
849
- - **Fast Mode (up to 121s GPU budget)**: Fixed 8 denoising steps for quick generation.
850
- - **Quality Mode (up to 720s GPU budget)**: Custom denoising steps (adjustable via "Diffusion steps" slider, default: 25 steps).
851
 
852
- *Note: The GPU duration (121s/720s) represents the maximum budget allocated, not the actual generation time. Multi-person videos generally require longer duration and more Usage Quota for better quality.*
853
  """)
854
 
855
  with gr.Column(scale=2):
@@ -859,7 +924,7 @@ def run_graio_demo(args):
859
  gr.Markdown("""
860
  ### Example Cases
861
 
862
- *Note: Generation time (tested on NVIDIA H121 GPU with 40 denoising steps) may vary depending on GPU specifications and system load.*
863
  """)
864
 
865
  # 创建一个函数来处理 examples 选择
@@ -927,13 +992,65 @@ def run_graio_demo(args):
927
  img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
928
  sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector
929
  )
 
 
 
930
  return result
931
 
932
- def handle_quality_generation(*args):
933
- result = gpu_wrapped_generate_video_quality(*args)
934
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
935
 
936
- # 快速生成按钮:121秒,固定10步
937
  run_i2v_button_fast.click(
938
  fn=handle_fast_generation,
939
  inputs=[img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3, sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector],
 
611
  logging.info(f"No audio files provided, video saved to: {output_file}")
612
 
613
  logging.info("Finished.")
614
+
615
+ # 计算视频时长信息(用于quality模式的提示)
616
+ fps = getattr(cfg, 'fps', 24)
617
+ video_duration_seconds = current_frame_num / fps if current_frame_num and fps else 0
618
+ return output_file, video_duration_seconds, actual_steps
619
 
620
+ # 计算动态duration的函数
621
+ def get_duration(video_seconds, steps):
622
+ """
623
+ 计算quality模式所需的GPU duration
624
+ duration = 视频秒数 * 步数 * 2秒
625
+ """
626
+ return int(video_seconds * steps * 2)
627
+
628
+ # 为quality模式创建动态duration计算函数
629
+ def calculate_quality_duration(*args, **kwargs):
630
+ """
631
+ 从函数参数中提取视频时长和步数,计算动态duration
632
+ 参数顺序: img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
633
+ sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector
634
+ """
635
+ if len(args) >= 11:
636
+ img2vid_audio_1 = args[3]
637
+ img2vid_audio_2 = args[4]
638
+ img2vid_audio_3 = args[5]
639
+ sd_steps = args[6]
640
+ person_num_selector = args[9]
641
+ audio_mode_selector = args[10]
642
+
643
+ # 根据人数收集音频路径
644
+ audio_paths = []
645
+ if person_num_selector == "1 Person":
646
+ if img2vid_audio_1:
647
+ audio_paths.append(img2vid_audio_1)
648
+ elif person_num_selector == "2 Persons":
649
+ if img2vid_audio_1:
650
+ audio_paths.append(img2vid_audio_1)
651
+ if img2vid_audio_2:
652
+ audio_paths.append(img2vid_audio_2)
653
+ elif person_num_selector == "3 Persons":
654
+ if img2vid_audio_1:
655
+ audio_paths.append(img2vid_audio_1)
656
+ if img2vid_audio_2:
657
+ audio_paths.append(img2vid_audio_2)
658
+ if img2vid_audio_3:
659
+ audio_paths.append(img2vid_audio_3)
660
+
661
+ # 计算预期的视频时长
662
+ fps = getattr(cfg, 'fps', 24)
663
+ expected_video_seconds = 8.0 # 默认值
664
+ if audio_paths and len(audio_paths) > 0:
665
+ try:
666
+ calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
667
+ expected_video_seconds = calculated_frame_num / fps
668
+ except Exception as e:
669
+ logging.warning(f"Failed to calculate expected video duration for GPU allocation: {e}")
670
+
671
+ # 计算并返回duration
672
+ return get_duration(expected_video_seconds, sd_steps)
673
+ else:
674
+ # 如果参数不足,返回默认值
675
+ return 720
676
+
677
  # 使用 @spaces.GPU 装饰器包装 generate_video 函数(参考 LivePortrait)
678
  # 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/app.py
679
  # @spaces.GPU 装饰器会自动处理 GPU 初始化,不需要手动初始化
680
 
681
+ # 快速生成模式:220秒,固定8步去噪
682
+ @spaces.GPU(duration=220)
683
  def gpu_wrapped_generate_video_fast(*args, **kwargs):
684
  # 固定使用10步去噪,通过关键字参数传递
685
  kwargs['fixed_steps'] = 8
 
741
 
742
  return gpu_wrapped_generate_video_worker(*args, **kwargs)
743
 
744
+ # 高质量生成模式:动态duration,根据视频时长和步数计算
745
+ @spaces.GPU(duration=calculate_quality_duration)
746
  def gpu_wrapped_generate_video_quality(*args, **kwargs):
747
  return gpu_wrapped_generate_video_worker(*args, **kwargs)
748
 
 
796
  logging.warning(f"Failed to move models to GPU: {e}")
797
 
798
  result = generate_video(*args, **kwargs)
799
+ # generate_video 现在返回 (output_file, video_duration_seconds, actual_steps)
800
+ if isinstance(result, tuple) and len(result) == 3:
801
+ return result
802
+ # 兼容旧格式(如果返回的是单个值)
803
+ return result, 0, 0
804
 
805
 
806
 
 
900
 
901
  with gr.Row():
902
  run_i2v_button_fast = gr.Button(
903
+ "Generate Video (Fast - 220s, 8 steps)",
904
  variant="secondary",
905
  scale=1
906
  )
907
  run_i2v_button_quality = gr.Button(
908
+ "Generate Video (Quality - Dynamic, Custom steps)",
909
  variant="primary",
910
  scale=1
911
  )
912
  gr.Markdown("""
913
  **Generation Modes:**
914
+ - **Fast Mode (220s GPU budget)**: Fixed 8 denoising steps for quick generation. Maximum video duration: 6 seconds.
915
+ - **Quality Mode (Dynamic GPU budget)**: Custom denoising steps (adjustable via "Diffusion steps" slider, default: 25 steps). GPU duration is dynamically calculated as: video_seconds × steps × 2s. Maximum video duration: 8 seconds with default 25 steps.
916
 
917
+ *Note: Fast mode has a fixed 220s GPU budget. Quality mode dynamically allocates GPU time based on video length and denoising steps. Multi-person videos generally require longer duration and more Usage Quota for better quality.*
918
  """)
919
 
920
  with gr.Column(scale=2):
 
924
  gr.Markdown("""
925
  ### Example Cases
926
 
927
+ *Note: Generation time (tested on NVIDIA H220 GPU with 40 denoising steps) may vary depending on GPU specifications and system load.*
928
  """)
929
 
930
  # 创建一个函数来处理 examples 选择
 
992
  img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
993
  sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector
994
  )
995
+ # 处理返回结果:可能是 (output_file, video_duration_seconds, actual_steps) 或 output_file
996
+ if isinstance(result, tuple) and len(result) == 3:
997
+ return result[0] # 只返回视频文件
998
  return result
999
 
1000
+ def handle_quality_generation(img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
1001
+ sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector):
1002
+ # 在生成前先计算预期的视频时长和duration
1003
+ # 根据人数收集音频路径
1004
+ audio_paths = []
1005
+ if person_num_selector == "1 Person":
1006
+ if img2vid_audio_1:
1007
+ audio_paths.append(img2vid_audio_1)
1008
+ elif person_num_selector == "2 Persons":
1009
+ if img2vid_audio_1:
1010
+ audio_paths.append(img2vid_audio_1)
1011
+ if img2vid_audio_2:
1012
+ audio_paths.append(img2vid_audio_2)
1013
+ elif person_num_selector == "3 Persons":
1014
+ if img2vid_audio_1:
1015
+ audio_paths.append(img2vid_audio_1)
1016
+ if img2vid_audio_2:
1017
+ audio_paths.append(img2vid_audio_2)
1018
+ if img2vid_audio_3:
1019
+ audio_paths.append(img2vid_audio_3)
1020
+
1021
+ # 计算预期的视频时长
1022
+ fps = getattr(cfg, 'fps', 24)
1023
+ expected_video_seconds = 0
1024
+ if audio_paths and len(audio_paths) > 0:
1025
+ try:
1026
+ calculated_frame_num = calculate_frame_num_from_audio(audio_paths, fps, mode=audio_mode_selector)
1027
+ expected_video_seconds = calculated_frame_num / fps
1028
+ except Exception as e:
1029
+ logging.warning(f"Failed to calculate expected video duration: {e}")
1030
+ expected_video_seconds = 8.0 # 默认值
1031
+
1032
+ # 计算动态duration
1033
+ expected_duration = get_duration(expected_video_seconds, sd_steps)
1034
+
1035
+ # 执行生成
1036
+ result = gpu_wrapped_generate_video_quality(
1037
+ img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
1038
+ sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector
1039
+ )
1040
+
1041
+ # 处理返回结果并显示提示
1042
+ if isinstance(result, tuple) and len(result) == 3:
1043
+ output_file, actual_video_seconds, actual_steps = result
1044
+ # 计算实际使用的duration
1045
+ actual_duration = get_duration(actual_video_seconds, actual_steps)
1046
+ # 使用 gr.Info 提示用户
1047
+ info_msg = f"Video generation completed! Duration used: {actual_duration}s (estimated: {actual_video_seconds:.2f}s video × {actual_steps} steps × 2s)"
1048
+ gr.Info(info_msg)
1049
+ return output_file
1050
+ else:
1051
+ return result
1052
 
1053
+ # 快速生成按钮:220秒,固定10步
1054
  run_i2v_button_fast.click(
1055
  fn=handle_fast_generation,
1056
  inputs=[img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3, sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector],