Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -73,16 +73,12 @@ aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
|
|
| 73 |
aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
|
| 74 |
|
| 75 |
|
| 76 |
-
default_prompt_i2v = "
|
| 77 |
-
default_negative_prompt = "
|
| 78 |
|
| 79 |
def resize_image(image: Image.Image) -> Image.Image:
|
| 80 |
-
"""
|
| 81 |
-
Resizes an image to fit within the model's constraints, preserving aspect ratio as much as possible.
|
| 82 |
-
"""
|
| 83 |
width, height = image.size
|
| 84 |
|
| 85 |
-
# Handle square case
|
| 86 |
if width == height:
|
| 87 |
return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
|
| 88 |
|
|
@@ -94,22 +90,20 @@ def resize_image(image: Image.Image) -> Image.Image:
|
|
| 94 |
image_to_resize = image
|
| 95 |
|
| 96 |
if aspect_ratio > MAX_ASPECT_RATIO:
|
| 97 |
-
# Very wide image -> crop width to fit 832x480 aspect ratio
|
| 98 |
target_w, target_h = MAX_DIM, MIN_DIM
|
| 99 |
crop_width = int(round(height * MAX_ASPECT_RATIO))
|
| 100 |
left = (width - crop_width) // 2
|
| 101 |
image_to_resize = image.crop((left, 0, left + crop_width, height))
|
| 102 |
elif aspect_ratio < MIN_ASPECT_RATIO:
|
| 103 |
-
# Very tall image -> crop height to fit 480x832 aspect ratio
|
| 104 |
target_w, target_h = MIN_DIM, MAX_DIM
|
| 105 |
crop_height = int(round(width / MIN_ASPECT_RATIO))
|
| 106 |
top = (height - crop_height) // 2
|
| 107 |
image_to_resize = image.crop((0, top, width, top + crop_height))
|
| 108 |
else:
|
| 109 |
-
if width > height:
|
| 110 |
target_w = MAX_DIM
|
| 111 |
target_h = int(round(target_w / aspect_ratio))
|
| 112 |
-
else:
|
| 113 |
target_h = MAX_DIM
|
| 114 |
target_w = int(round(target_h * aspect_ratio))
|
| 115 |
|
|
@@ -163,48 +157,8 @@ def generate_video(
|
|
| 163 |
randomize_seed = False,
|
| 164 |
progress=gr.Progress(track_tqdm=True),
|
| 165 |
):
|
| 166 |
-
"""
|
| 167 |
-
Generate a video from an input image using the Wan 2.2 14B I2V model with Lightning LoRA.
|
| 168 |
-
|
| 169 |
-
This function takes an input image and generates a video animation based on the provided
|
| 170 |
-
prompt and parameters. It uses an FP8 qunatized Wan 2.2 14B Image-to-Video model in with Lightning LoRA
|
| 171 |
-
for fast generation in 4-8 steps.
|
| 172 |
-
|
| 173 |
-
Args:
|
| 174 |
-
input_image (PIL.Image): The input image to animate. Will be resized to target dimensions.
|
| 175 |
-
prompt (str): Text prompt describing the desired animation or motion.
|
| 176 |
-
steps (int, optional): Number of inference steps. More steps = higher quality but slower.
|
| 177 |
-
Defaults to 4. Range: 1-30.
|
| 178 |
-
negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
|
| 179 |
-
Defaults to default_negative_prompt (contains unwanted visual artifacts).
|
| 180 |
-
duration_seconds (float, optional): Duration of the generated video in seconds.
|
| 181 |
-
Defaults to 2. Clamped between MIN_FRAMES_MODEL/FIXED_FPS and MAX_FRAMES_MODEL/FIXED_FPS.
|
| 182 |
-
guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
|
| 183 |
-
Defaults to 1.0. Range: 0.0-20.0.
|
| 184 |
-
guidance_scale_2 (float, optional): Controls adherence to the prompt. Higher values = more adherence.
|
| 185 |
-
Defaults to 1.0. Range: 0.0-20.0.
|
| 186 |
-
seed (int, optional): Random seed for reproducible results. Defaults to 42.
|
| 187 |
-
Range: 0 to MAX_SEED (2147483647).
|
| 188 |
-
randomize_seed (bool, optional): Whether to use a random seed instead of the provided seed.
|
| 189 |
-
Defaults to False.
|
| 190 |
-
progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
|
| 191 |
-
|
| 192 |
-
Returns:
|
| 193 |
-
tuple: A tuple containing:
|
| 194 |
-
- video_path (str): Path to the generated video file (.mp4)
|
| 195 |
-
- current_seed (int): The seed used for generation (useful when randomize_seed=True)
|
| 196 |
-
|
| 197 |
-
Raises:
|
| 198 |
-
gr.Error: If input_image is None (no image uploaded).
|
| 199 |
-
|
| 200 |
-
Note:
|
| 201 |
-
- Frame count is calculated as duration_seconds * FIXED_FPS (24)
|
| 202 |
-
- Output dimensions are adjusted to be multiples of MOD_VALUE (32)
|
| 203 |
-
- The function uses GPU acceleration via the @spaces.GPU decorator
|
| 204 |
-
- Generation time varies based on steps and duration (see get_duration function)
|
| 205 |
-
"""
|
| 206 |
if input_image is None:
|
| 207 |
-
raise gr.Error("
|
| 208 |
|
| 209 |
num_frames = get_num_frames(duration_seconds)
|
| 210 |
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
|
@@ -230,26 +184,35 @@ def generate_video(
|
|
| 230 |
|
| 231 |
return video_path, current_seed
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
gr.Markdown("
|
|
|
|
|
|
|
| 236 |
with gr.Row():
|
| 237 |
-
with gr.Column():
|
| 238 |
-
input_image_component = gr.Image(type="pil", label="
|
| 239 |
-
prompt_input = gr.Textbox(label="
|
| 240 |
-
duration_seconds_input = gr.Slider(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
with gr.Accordion("
|
| 243 |
-
negative_prompt_input = gr.Textbox(label="
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
generate_button = gr.Button("
|
| 251 |
-
|
| 252 |
-
|
|
|
|
| 253 |
|
| 254 |
ui_inputs = [
|
| 255 |
input_image_component, prompt_input, steps_slider,
|
|
@@ -262,21 +225,24 @@ with gr.Blocks() as demo:
|
|
| 262 |
examples=[
|
| 263 |
[
|
| 264 |
"wan_i2v_input.JPG",
|
| 265 |
-
"POV
|
| 266 |
4,
|
| 267 |
],
|
| 268 |
[
|
| 269 |
"wan22_input_2.jpg",
|
| 270 |
-
"
|
| 271 |
4,
|
| 272 |
],
|
| 273 |
[
|
| 274 |
"kill_bill.jpeg",
|
| 275 |
-
"
|
| 276 |
6,
|
| 277 |
],
|
| 278 |
],
|
| 279 |
-
inputs=[input_image_component, prompt_input, steps_slider],
|
|
|
|
|
|
|
|
|
|
| 280 |
)
|
| 281 |
|
| 282 |
if __name__ == "__main__":
|
|
|
|
| 73 |
aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
|
| 74 |
|
| 75 |
|
| 76 |
+
default_prompt_i2v = "์ด ์ด๋ฏธ์ง์ ์๋๊ฐ์ ๋ถ์ฌํ๊ณ , ์ํ ๊ฐ์ ์์ง์๊ณผ ๋ถ๋๋ฌ์ด ์ ๋๋ฉ์ด์
์ ์ ์ฉ"
|
| 77 |
+
default_negative_prompt = "์์กฐ ์ ๋ช
, ๊ณผ๋ค ๋
ธ์ถ, ์ ์ , ์ธ๋ถ ํ๋ฆผ, ์๋ง, ์คํ์ผ, ์ํ, ๊ทธ๋ฆผ, ํ๋ฉด, ์ ์ง, ํ์์กฐ, ์ต์
ํ์ง, ์ ํ์ง, JPEG ์์ถ, ์ถํจ, ๋ถ์์ , ์ถ๊ฐ ์๊ฐ๋ฝ, ์๋ชป ๊ทธ๋ ค์ง ์, ์๋ชป ๊ทธ๋ ค์ง ์ผ๊ตด, ๊ธฐํ, ๋ณํ, ํํ ๋ถ๋ ์ฌ์ง, ์๊ฐ๋ฝ ์ตํฉ, ์ ์ง ํ๋ฉด, ์ง์ ๋ถํ ๋ฐฐ๊ฒฝ, ์ธ ๊ฐ์ ๋ค๋ฆฌ, ๋ฐฐ๊ฒฝ ์ฌ๋ ๋ง์, ๋ค๋ก ๊ฑท๊ธฐ"
|
| 78 |
|
| 79 |
def resize_image(image: Image.Image) -> Image.Image:
|
|
|
|
|
|
|
|
|
|
| 80 |
width, height = image.size
|
| 81 |
|
|
|
|
| 82 |
if width == height:
|
| 83 |
return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
|
| 84 |
|
|
|
|
| 90 |
image_to_resize = image
|
| 91 |
|
| 92 |
if aspect_ratio > MAX_ASPECT_RATIO:
|
|
|
|
| 93 |
target_w, target_h = MAX_DIM, MIN_DIM
|
| 94 |
crop_width = int(round(height * MAX_ASPECT_RATIO))
|
| 95 |
left = (width - crop_width) // 2
|
| 96 |
image_to_resize = image.crop((left, 0, left + crop_width, height))
|
| 97 |
elif aspect_ratio < MIN_ASPECT_RATIO:
|
|
|
|
| 98 |
target_w, target_h = MIN_DIM, MAX_DIM
|
| 99 |
crop_height = int(round(width / MIN_ASPECT_RATIO))
|
| 100 |
top = (height - crop_height) // 2
|
| 101 |
image_to_resize = image.crop((0, top, width, top + crop_height))
|
| 102 |
else:
|
| 103 |
+
if width > height:
|
| 104 |
target_w = MAX_DIM
|
| 105 |
target_h = int(round(target_w / aspect_ratio))
|
| 106 |
+
else:
|
| 107 |
target_h = MAX_DIM
|
| 108 |
target_w = int(round(target_h * aspect_ratio))
|
| 109 |
|
|
|
|
| 157 |
randomize_seed = False,
|
| 158 |
progress=gr.Progress(track_tqdm=True),
|
| 159 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
if input_image is None:
|
| 161 |
+
raise gr.Error("์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํด์ฃผ์ธ์.")
|
| 162 |
|
| 163 |
num_frames = get_num_frames(duration_seconds)
|
| 164 |
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
|
|
|
| 184 |
|
| 185 |
return video_path, current_seed
|
| 186 |
|
| 187 |
+
# ์ธ๋ จ๋ ํ๊ธ UI
|
| 188 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 189 |
+
gr.Markdown("# ๐ฌ ์ด๋ฏธ์ง๋ฅผ ์์์ผ๋ก ๋ณํ")
|
| 190 |
+
gr.Markdown("**Wan 2.2 Lightning** - 4~8๋จ๊ณ๋ก ๋น ๋ฅธ ์์ ์์ฑ")
|
| 191 |
+
|
| 192 |
with gr.Row():
|
| 193 |
+
with gr.Column(scale=1):
|
| 194 |
+
input_image_component = gr.Image(type="pil", label="์
๋ ฅ ์ด๋ฏธ์ง")
|
| 195 |
+
prompt_input = gr.Textbox(label="ํ๋กฌํํธ", value=default_prompt_i2v, lines=2)
|
| 196 |
+
duration_seconds_input = gr.Slider(
|
| 197 |
+
minimum=MIN_DURATION,
|
| 198 |
+
maximum=MAX_DURATION,
|
| 199 |
+
step=0.1,
|
| 200 |
+
value=3.5,
|
| 201 |
+
label="์์ ๊ธธ์ด (์ด)"
|
| 202 |
+
)
|
| 203 |
|
| 204 |
+
with gr.Accordion("๊ณ ๊ธ ์ค์ ", open=False):
|
| 205 |
+
negative_prompt_input = gr.Textbox(label="๋ค๊ฑฐํฐ๋ธ ํ๋กฌํํธ", value=default_negative_prompt, lines=2)
|
| 206 |
+
steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="์์ฑ ๋จ๊ณ")
|
| 207 |
+
guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="๊ฐ์ด๋์ค ์ค์ผ์ผ 1")
|
| 208 |
+
guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="๊ฐ์ด๋์ค ์ค์ผ์ผ 2")
|
| 209 |
+
seed_input = gr.Slider(label="์๋", minimum=0, maximum=MAX_SEED, step=1, value=42)
|
| 210 |
+
randomize_seed_checkbox = gr.Checkbox(label="๋๋ค ์๋ ์ฌ์ฉ", value=True)
|
| 211 |
+
|
| 212 |
+
generate_button = gr.Button("๐ฅ ์์ ์์ฑ", variant="primary", size="lg")
|
| 213 |
+
|
| 214 |
+
with gr.Column(scale=1):
|
| 215 |
+
video_output = gr.Video(label="์์ฑ๋ ์์", autoplay=True, interactive=False)
|
| 216 |
|
| 217 |
ui_inputs = [
|
| 218 |
input_image_component, prompt_input, steps_slider,
|
|
|
|
| 225 |
examples=[
|
| 226 |
[
|
| 227 |
"wan_i2v_input.JPG",
|
| 228 |
+
"POV ์
์นด ์์, ์ ๊ธ๋ผ์ค ๋ ํฐ ๊ณ ์์ด๊ฐ ์ํ๋ณด๋์ ์์ ํธ์ํ ๋ฏธ์. ๋ฐฐ๊ฒฝ์ ์ด๋ ํด๋ณ(๋ง์ ๋ฌผ, ๋
น์ ์ธ๋, ๊ตฌ๋ฆ ๋ ํธ๋ฅธ ํ๋). ์ํ๋ณด๋๊ฐ ๊ธฐ์ธ์ด์ง๊ณ ๊ณ ์์ด๊ฐ ๋ฐ๋ค๋ก ๋จ์ด์ง๋ฉฐ ์นด๋ฉ๋ผ๊ฐ ๊ฑฐํ๊ณผ ํ๋น๊ณผ ํจ๊ป ๋ฌผ์์ผ๋ก ๋น ์ง. ์ ๊น ๋ฌผ์์์ ๊ณ ์์ด ์ผ๊ตด ๋ณด์ด๋ค๊ฐ ๋ค์ ์๋ฉด ์๋ก ์ฌ๋ผ์ ์
์นด ์ดฌ์ ๊ณ์, ์ฆ๊ฑฐ์ด ์ฌ๋ฆ ํด๊ฐ ๋ถ์๊ธฐ.",
|
| 229 |
4,
|
| 230 |
],
|
| 231 |
[
|
| 232 |
"wan22_input_2.jpg",
|
| 233 |
+
"์ธ๋ จ๋ ๋ฌ ํ์ฌ ์ฐจ๋์ด ์ผ์ชฝ์์ ์ค๋ฅธ์ชฝ์ผ๋ก ๋ฏธ๋๋ฌ์ง๋ฏ ์ด๋ํ๋ฉฐ ๋ฌ ๋จผ์ง๋ฅผ ์ผ์ผํด. ํฐ ์ฐ์ฃผ๋ณต์ ์
์ ์ฐ์ฃผ์ธ๋ค์ด ๋ฌ ํน์ ์ ๋ฐ๋ ๋์์ผ๋ก ํ์น. ๋จผ ๋ฐฐ๊ฒฝ์์ VTOL ๋นํ์ฒด๊ฐ ์์ง์ผ๋ก ํ๊ฐํ์ฌ ํ๋ฉด์ ์กฐ์ฉํ ์ฐฉ๋ฅ. ์ฅ๋ฉด ์ ์ฒด์ ๊ฑธ์ณ ์ดํ์ค์ ์ธ ์ค๋ก๋ผ๊ฐ ๋ณ์ด ๊ฐ๋ํ ํ๋์ ๊ฐ๋ก์ง๋ฅด๋ฉฐ ์ถค์ถ๊ณ , ๋
น์, ํ๋์, ๋ณด๋ผ์ ๋น์ ์ปคํผ์ด ๋ฌ ํ๊ฒฝ์ ์ ๋น๋กญ๊ณ ๋ง๋ฒ ๊ฐ์ ๋น์ผ๋ก ๊ฐ์.",
|
| 234 |
4,
|
| 235 |
],
|
| 236 |
[
|
| 237 |
"kill_bill.jpeg",
|
| 238 |
+
"์ฐ๋ง ์๋จผ์ ์บ๋ฆญํฐ ๋ฒ ์ํธ๋ฆญ์ค ํค๋๊ฐ ์ํ ๊ฐ์ ์กฐ๋ช
์์์ ๋ ์นด๋ก์ด ์นดํ๋ ๊ฒ์ ์์ ์ ์ผ๋ก ๋ค๊ณ ์์. ๊ฐ์๊ธฐ ๊ดํ ๋๋ ๊ฐ์ฒ ์ด ๋ถ๋๋ฌ์์ง๊ณ ์๊ณก๋๊ธฐ ์์ํ๋ฉฐ ๊ฐ์ด๋ ๊ธ์์ฒ๋ผ ๊ตฌ์กฐ์ ์์ ์ฑ์ ์๊ธฐ ์์. ๊ฒ๋ ์ ์๋ฒฝํ ๋์ด ์ฒ์ฒํ ํ์ด์ง๊ณ ๋์ด์ง๋ฉฐ, ๋
น์ ๊ฐ์ฒ ์ด ์๋น ๋ฌผ์ค๊ธฐ๋ก ์๋๋ก ํ๋ฌ๋ด๋ฆผ. ๋ณํ์ ์ฒ์์๋ ๋ฏธ๋ฌํ๊ฒ ์์๋๋ค๊ฐ ๊ธ์์ด ์ ์ ๋ ์ ๋์ ์ด ๋๋ฉด์ ๊ฐ์ํ. ์นด๋ฉ๋ผ๋ ๊ทธ๋
์ ์ผ๊ตด์ ๊ณ ์ ํ๊ณ ๋ ์นด๋ก์ด ๋๋น์ด ์ ์ฐจ ์ข์์ง๋๋ฐ, ์น๋ช
์ ์ธ ์ง์ค์ด ์๋๋ผ ๋ฌด๊ธฐ๊ฐ ๋์์์ ๋
น๋ ๊ฒ์ ๋ณด๋ฉฐ ํผ๋๊ณผ ๊ฒฝ์
. ํธํก์ด ์ฝ๊ฐ ๋นจ๋ผ์ง๋ฉฐ ์ด ๋ถ๊ฐ๋ฅํ ๋ณํ์ ๋ชฉ๊ฒฉ. ๋
น๋ ํ์์ด ๊ฐํ๋๊ณ ์นดํ๋์ ์๋ฒฝํ ํํ๊ฐ ์ ์ ์ถ์์ ์ด ๋๋ฉฐ ์์์ ์์์ฒ๋ผ ๋จ์ด์ง. ๋
น์ ๋ฐฉ์ธ์ด ๋ถ๋๋ฌ์ด ๊ธ์ ์ถฉ๊ฒฉ์๊ณผ ํจ๊ป ๋ฐ๋ฅ์ ๋จ์ด์ง. ํ์ ์ด ์ฐจ๋ถํ ์ค๋น์์ ๋นํน๊ฐ๊ณผ ์ฐ๋ ค๋ก ๋ฐ๋๋ฉฐ ์ ์ค์ ์ธ ๋ณต์์ ๋๊ตฌ๊ฐ ์์์ ๋ฌธ์ ๊ทธ๋๋ก ์กํ๋์ด ๋ฌด๋ฐฉ๋น ์ํ๊ฐ ๋จ.",
|
| 239 |
6,
|
| 240 |
],
|
| 241 |
],
|
| 242 |
+
inputs=[input_image_component, prompt_input, steps_slider],
|
| 243 |
+
outputs=[video_output, seed_input],
|
| 244 |
+
fn=generate_video,
|
| 245 |
+
cache_examples="lazy"
|
| 246 |
)
|
| 247 |
|
| 248 |
if __name__ == "__main__":
|