Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -35,7 +35,7 @@ vl_gpt = vl_gpt.cuda().eval()
|
|
| 35 |
|
| 36 |
# === Text-and-Image-to-Image generation ===
|
| 37 |
def text_and_image_to_image_generate(input_prompt, input_image_path, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5, cfg_weight2=5):
|
| 38 |
-
|
| 39 |
|
| 40 |
input_img_tokens = vl_chat_processor.image_start_tag + vl_chat_processor.image_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag + vl_chat_processor.image_start_tag + vl_chat_processor.pad_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag
|
| 41 |
output_img_tokens = vl_chat_processor.image_start_tag
|
|
@@ -44,7 +44,7 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
|
|
| 44 |
input_images = [input_image_path]
|
| 45 |
img_len = len(input_images)
|
| 46 |
prompts = input_img_tokens * img_len + input_prompt
|
| 47 |
-
|
| 48 |
{"role": "<|User|>", "content": prompts},
|
| 49 |
{"role": "<|Assistant|>", "content": ""}
|
| 50 |
]
|
|
@@ -95,7 +95,6 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
|
|
| 95 |
inputs_embeds[ind[0], offset: offset + image_embeds_input.shape[1], :] = image_embeds_input[(ii // 2) % img_len]
|
| 96 |
|
| 97 |
generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).cuda()
|
| 98 |
-
|
| 99 |
past_key_values = None
|
| 100 |
|
| 101 |
for i in range(image_token_num_per_image):
|
|
@@ -147,7 +146,9 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
|
|
| 147 |
|
| 148 |
torch.cuda.empty_cache()
|
| 149 |
return output_images
|
|
|
|
| 150 |
# === Text-to-Image generation ===
|
|
|
|
| 151 |
torch.cuda.empty_cache()
|
| 152 |
|
| 153 |
conversation = [
|
|
@@ -232,7 +233,6 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
|
|
| 232 |
torch.cuda.empty_cache()
|
| 233 |
return output_images
|
| 234 |
|
| 235 |
-
|
| 236 |
# === Enhanced Gradio handlers ===
|
| 237 |
@spaces.GPU(duration=120)
|
| 238 |
def janus_generate_image(message, history, uploaded_image=None):
|
|
@@ -257,7 +257,6 @@ def janus_generate_image(message, history, uploaded_image=None):
|
|
| 257 |
|
| 258 |
return {"role": "assistant", "content": {"path": images[0]}}
|
| 259 |
|
| 260 |
-
|
| 261 |
# === Alternative interface for explicit text+image input ===
|
| 262 |
@spaces.GPU(duration=120)
|
| 263 |
def generate_from_text_and_image(prompt, input_image):
|
|
@@ -282,7 +281,6 @@ def generate_from_text_and_image(prompt, input_image):
|
|
| 282 |
if os.path.exists(temp_image_path):
|
| 283 |
os.remove(temp_image_path)
|
| 284 |
|
| 285 |
-
|
| 286 |
@spaces.GPU(duration=120)
|
| 287 |
def generate_from_text_only(prompt):
|
| 288 |
output_path = "./output/text_only_gen.png"
|
|
@@ -293,7 +291,6 @@ def generate_from_text_only(prompt):
|
|
| 293 |
except Exception as e:
|
| 294 |
return None, f"Error generating image: {str(e)}"
|
| 295 |
|
| 296 |
-
|
| 297 |
# === Enhanced Gradio UI with multiple interfaces ===
|
| 298 |
with gr.Blocks(theme="soft", title="Janus Text-to-Image & Text+Image-to-Image") as demo:
|
| 299 |
gr.Markdown("# Janus Multi-Modal Image Generation")
|
|
@@ -331,8 +328,6 @@ with gr.Blocks(theme="soft", title="Janus Text-to-Image & Text+Image-to-Image")
|
|
| 331 |
["a cat sitting on a windowsill", None],
|
| 332 |
["a futuristic city at sunset", None],
|
| 333 |
["a dragon flying over mountains", None],
|
| 334 |
-
["Turn this into a nighttime scene", "example_image.jpg"],
|
| 335 |
-
["Make this image more vibrant and colorful", "example_image.jpg"],
|
| 336 |
],
|
| 337 |
inputs=[msg_input, image_input]
|
| 338 |
)
|
|
|
|
| 35 |
|
| 36 |
# === Text-and-Image-to-Image generation ===
|
| 37 |
def text_and_image_to_image_generate(input_prompt, input_image_path, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5, cfg_weight2=5):
|
| 38 |
+
torch.cuda.empty_cache()
|
| 39 |
|
| 40 |
input_img_tokens = vl_chat_processor.image_start_tag + vl_chat_processor.image_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag + vl_chat_processor.image_start_tag + vl_chat_processor.pad_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag
|
| 41 |
output_img_tokens = vl_chat_processor.image_start_tag
|
|
|
|
| 44 |
input_images = [input_image_path]
|
| 45 |
img_len = len(input_images)
|
| 46 |
prompts = input_img_tokens * img_len + input_prompt
|
| 47 |
+
conversation = [
|
| 48 |
{"role": "<|User|>", "content": prompts},
|
| 49 |
{"role": "<|Assistant|>", "content": ""}
|
| 50 |
]
|
|
|
|
| 95 |
inputs_embeds[ind[0], offset: offset + image_embeds_input.shape[1], :] = image_embeds_input[(ii // 2) % img_len]
|
| 96 |
|
| 97 |
generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).cuda()
|
|
|
|
| 98 |
past_key_values = None
|
| 99 |
|
| 100 |
for i in range(image_token_num_per_image):
|
|
|
|
| 146 |
|
| 147 |
torch.cuda.empty_cache()
|
| 148 |
return output_images
|
| 149 |
+
|
| 150 |
# === Text-to-Image generation ===
|
| 151 |
+
def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5.0):
|
| 152 |
torch.cuda.empty_cache()
|
| 153 |
|
| 154 |
conversation = [
|
|
|
|
| 233 |
torch.cuda.empty_cache()
|
| 234 |
return output_images
|
| 235 |
|
|
|
|
| 236 |
# === Enhanced Gradio handlers ===
|
| 237 |
@spaces.GPU(duration=120)
|
| 238 |
def janus_generate_image(message, history, uploaded_image=None):
|
|
|
|
| 257 |
|
| 258 |
return {"role": "assistant", "content": {"path": images[0]}}
|
| 259 |
|
|
|
|
| 260 |
# === Alternative interface for explicit text+image input ===
|
| 261 |
@spaces.GPU(duration=120)
|
| 262 |
def generate_from_text_and_image(prompt, input_image):
|
|
|
|
| 281 |
if os.path.exists(temp_image_path):
|
| 282 |
os.remove(temp_image_path)
|
| 283 |
|
|
|
|
| 284 |
@spaces.GPU(duration=120)
|
| 285 |
def generate_from_text_only(prompt):
|
| 286 |
output_path = "./output/text_only_gen.png"
|
|
|
|
| 291 |
except Exception as e:
|
| 292 |
return None, f"Error generating image: {str(e)}"
|
| 293 |
|
|
|
|
| 294 |
# === Enhanced Gradio UI with multiple interfaces ===
|
| 295 |
with gr.Blocks(theme="soft", title="Janus Text-to-Image & Text+Image-to-Image") as demo:
|
| 296 |
gr.Markdown("# Janus Multi-Modal Image Generation")
|
|
|
|
| 328 |
["a cat sitting on a windowsill", None],
|
| 329 |
["a futuristic city at sunset", None],
|
| 330 |
["a dragon flying over mountains", None],
|
|
|
|
|
|
|
| 331 |
],
|
| 332 |
inputs=[msg_input, image_input]
|
| 333 |
)
|