Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -43,16 +43,31 @@ pipe = StableDiffusionXLFillPipeline.from_pretrained(
|
|
| 43 |
|
| 44 |
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
pipe.text_encoder = pipe.text_encoder.to(
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
def can_expand(source_width, source_height, target_width, target_height, alignment):
|
| 58 |
"""Checks if the image can be expanded based on the alignment."""
|
|
@@ -153,24 +168,25 @@ def infer(image, width, height, overlap_width, num_inference_steps, resize_optio
|
|
| 153 |
cnet_image = background.copy()
|
| 154 |
cnet_image.paste(0, (0, 0), mask)
|
| 155 |
|
| 156 |
-
final_prompt = f"{prompt_input} , high quality, 4k"
|
| 157 |
-
|
| 158 |
-
(
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
| 174 |
|
| 175 |
image = image.convert("RGBA")
|
| 176 |
cnet_image.paste(image, (0, 0), mask)
|
|
@@ -371,4 +387,4 @@ with gr.Blocks(css=css) as demo:
|
|
| 371 |
outputs=use_as_input_button,
|
| 372 |
)
|
| 373 |
|
| 374 |
-
demo.queue(max_size=12).launch(share=False)
|
|
|
|
| 43 |
|
| 44 |
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
| 45 |
|
| 46 |
+
# Make sure all text encoder components use the same dtype
|
| 47 |
+
pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float16)
|
| 48 |
+
pipe.text_encoder_2 = pipe.text_encoder_2.to(dtype=torch.float16)
|
| 49 |
+
|
| 50 |
+
# Patch the text encoder forward methods to ensure consistent dtype
|
| 51 |
+
def patch_text_encoder_forward(encoder):
|
| 52 |
+
original_forward = encoder.forward
|
| 53 |
+
|
| 54 |
+
def patched_forward(*args, **kwargs):
|
| 55 |
+
# Convert input tensors to float16
|
| 56 |
+
if len(args) > 0 and isinstance(args[0], torch.Tensor):
|
| 57 |
+
args = list(args)
|
| 58 |
+
args[0] = args[0].to(dtype=torch.float16)
|
| 59 |
+
|
| 60 |
+
for key in kwargs:
|
| 61 |
+
if isinstance(kwargs[key], torch.Tensor):
|
| 62 |
+
kwargs[key] = kwargs[key].to(dtype=torch.float16)
|
| 63 |
+
|
| 64 |
+
return original_forward(*args, **kwargs)
|
| 65 |
+
|
| 66 |
+
encoder.forward = patched_forward
|
| 67 |
+
|
| 68 |
+
# Apply the patch to both encoders
|
| 69 |
+
patch_text_encoder_forward(pipe.text_encoder)
|
| 70 |
+
patch_text_encoder_forward(pipe.text_encoder_2)
|
| 71 |
|
| 72 |
def can_expand(source_width, source_height, target_width, target_height, alignment):
|
| 73 |
"""Checks if the image can be expanded based on the alignment."""
|
|
|
|
| 168 |
cnet_image = background.copy()
|
| 169 |
cnet_image.paste(0, (0, 0), mask)
|
| 170 |
|
| 171 |
+
final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
|
| 172 |
+
|
| 173 |
+
with torch.cuda.amp.autocast(dtype=torch.float16):
|
| 174 |
+
(
|
| 175 |
+
prompt_embeds,
|
| 176 |
+
negative_prompt_embeds,
|
| 177 |
+
pooled_prompt_embeds,
|
| 178 |
+
negative_pooled_prompt_embeds,
|
| 179 |
+
) = pipe.encode_prompt(final_prompt, "cuda", True)
|
| 180 |
+
|
| 181 |
+
for image in pipe(
|
| 182 |
+
prompt_embeds=prompt_embeds,
|
| 183 |
+
negative_prompt_embeds=negative_prompt_embeds,
|
| 184 |
+
pooled_prompt_embeds=pooled_prompt_embeds,
|
| 185 |
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
| 186 |
+
image=cnet_image,
|
| 187 |
+
num_inference_steps=num_inference_steps
|
| 188 |
+
):
|
| 189 |
+
yield cnet_image, image
|
| 190 |
|
| 191 |
image = image.convert("RGBA")
|
| 192 |
cnet_image.paste(image, (0, 0), mask)
|
|
|
|
| 387 |
outputs=use_as_input_button,
|
| 388 |
)
|
| 389 |
|
| 390 |
+
demo.queue(max_size=12).launch(share=False)
|