akhaliq HF Staff commited on
Commit
a13563f
·
verified ·
1 Parent(s): 15b8e5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -9
app.py CHANGED
@@ -35,7 +35,7 @@ vl_gpt = vl_gpt.cuda().eval()
35
 
36
  # === Text-and-Image-to-Image generation ===
37
  def text_and_image_to_image_generate(input_prompt, input_image_path, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5, cfg_weight2=5):
38
- def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5.0):
39
 
40
  input_img_tokens = vl_chat_processor.image_start_tag + vl_chat_processor.image_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag + vl_chat_processor.image_start_tag + vl_chat_processor.pad_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag
41
  output_img_tokens = vl_chat_processor.image_start_tag
@@ -44,7 +44,7 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
44
  input_images = [input_image_path]
45
  img_len = len(input_images)
46
  prompts = input_img_tokens * img_len + input_prompt
47
- torch.cuda.empty_cache()
48
  {"role": "<|User|>", "content": prompts},
49
  {"role": "<|Assistant|>", "content": ""}
50
  ]
@@ -95,7 +95,6 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
95
  inputs_embeds[ind[0], offset: offset + image_embeds_input.shape[1], :] = image_embeds_input[(ii // 2) % img_len]
96
 
97
  generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).cuda()
98
-
99
  past_key_values = None
100
 
101
  for i in range(image_token_num_per_image):
@@ -147,7 +146,9 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
147
 
148
  torch.cuda.empty_cache()
149
  return output_images
 
150
  # === Text-to-Image generation ===
 
151
  torch.cuda.empty_cache()
152
 
153
  conversation = [
@@ -232,7 +233,6 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
232
  torch.cuda.empty_cache()
233
  return output_images
234
 
235
-
236
  # === Enhanced Gradio handlers ===
237
  @spaces.GPU(duration=120)
238
  def janus_generate_image(message, history, uploaded_image=None):
@@ -257,7 +257,6 @@ def janus_generate_image(message, history, uploaded_image=None):
257
 
258
  return {"role": "assistant", "content": {"path": images[0]}}
259
 
260
-
261
  # === Alternative interface for explicit text+image input ===
262
  @spaces.GPU(duration=120)
263
  def generate_from_text_and_image(prompt, input_image):
@@ -282,7 +281,6 @@ def generate_from_text_and_image(prompt, input_image):
282
  if os.path.exists(temp_image_path):
283
  os.remove(temp_image_path)
284
 
285
-
286
  @spaces.GPU(duration=120)
287
  def generate_from_text_only(prompt):
288
  output_path = "./output/text_only_gen.png"
@@ -293,7 +291,6 @@ def generate_from_text_only(prompt):
293
  except Exception as e:
294
  return None, f"Error generating image: {str(e)}"
295
 
296
-
297
  # === Enhanced Gradio UI with multiple interfaces ===
298
  with gr.Blocks(theme="soft", title="Janus Text-to-Image & Text+Image-to-Image") as demo:
299
  gr.Markdown("# Janus Multi-Modal Image Generation")
@@ -331,8 +328,6 @@ with gr.Blocks(theme="soft", title="Janus Text-to-Image & Text+Image-to-Image")
331
  ["a cat sitting on a windowsill", None],
332
  ["a futuristic city at sunset", None],
333
  ["a dragon flying over mountains", None],
334
- ["Turn this into a nighttime scene", "example_image.jpg"],
335
- ["Make this image more vibrant and colorful", "example_image.jpg"],
336
  ],
337
  inputs=[msg_input, image_input]
338
  )
 
35
 
36
  # === Text-and-Image-to-Image generation ===
37
  def text_and_image_to_image_generate(input_prompt, input_image_path, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5, cfg_weight2=5):
38
+ torch.cuda.empty_cache()
39
 
40
  input_img_tokens = vl_chat_processor.image_start_tag + vl_chat_processor.image_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag + vl_chat_processor.image_start_tag + vl_chat_processor.pad_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag
41
  output_img_tokens = vl_chat_processor.image_start_tag
 
44
  input_images = [input_image_path]
45
  img_len = len(input_images)
46
  prompts = input_img_tokens * img_len + input_prompt
47
+ conversation = [
48
  {"role": "<|User|>", "content": prompts},
49
  {"role": "<|Assistant|>", "content": ""}
50
  ]
 
95
  inputs_embeds[ind[0], offset: offset + image_embeds_input.shape[1], :] = image_embeds_input[(ii // 2) % img_len]
96
 
97
  generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).cuda()
 
98
  past_key_values = None
99
 
100
  for i in range(image_token_num_per_image):
 
146
 
147
  torch.cuda.empty_cache()
148
  return output_images
149
+
150
  # === Text-to-Image generation ===
151
+ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5.0):
152
  torch.cuda.empty_cache()
153
 
154
  conversation = [
 
233
  torch.cuda.empty_cache()
234
  return output_images
235
 
 
236
  # === Enhanced Gradio handlers ===
237
  @spaces.GPU(duration=120)
238
  def janus_generate_image(message, history, uploaded_image=None):
 
257
 
258
  return {"role": "assistant", "content": {"path": images[0]}}
259
 
 
260
  # === Alternative interface for explicit text+image input ===
261
  @spaces.GPU(duration=120)
262
  def generate_from_text_and_image(prompt, input_image):
 
281
  if os.path.exists(temp_image_path):
282
  os.remove(temp_image_path)
283
 
 
284
  @spaces.GPU(duration=120)
285
  def generate_from_text_only(prompt):
286
  output_path = "./output/text_only_gen.png"
 
291
  except Exception as e:
292
  return None, f"Error generating image: {str(e)}"
293
 
 
294
  # === Enhanced Gradio UI with multiple interfaces ===
295
  with gr.Blocks(theme="soft", title="Janus Text-to-Image & Text+Image-to-Image") as demo:
296
  gr.Markdown("# Janus Multi-Modal Image Generation")
 
328
  ["a cat sitting on a windowsill", None],
329
  ["a futuristic city at sunset", None],
330
  ["a dragon flying over mountains", None],
 
 
331
  ],
332
  inputs=[msg_input, image_input]
333
  )