Spaces:

akhaliq
/

Janus-4o-7B

Paused

App Files Files Community

akhaliq HF Staff commited on Jun 26

Commit

a13563f

verified ·

1 Parent(s): 15b8e5b

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -9

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ vl_gpt = vl_gpt.cuda().eval()
 # === Text-and-Image-to-Image generation ===
 def text_and_image_to_image_generate(input_prompt, input_image_path, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5, cfg_weight2=5):
-def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5.0):
     input_img_tokens = vl_chat_processor.image_start_tag + vl_chat_processor.image_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag + vl_chat_processor.image_start_tag + vl_chat_processor.pad_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag
     output_img_tokens = vl_chat_processor.image_start_tag
@@ -44,7 +44,7 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
     input_images = [input_image_path]
     img_len = len(input_images)
     prompts = input_img_tokens * img_len + input_prompt
-    torch.cuda.empty_cache()
         {"role": "<|User|>", "content": prompts},
         {"role": "<|Assistant|>", "content": ""}
     ]
@@ -95,7 +95,6 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
                 inputs_embeds[ind[0], offset: offset + image_embeds_input.shape[1], :] = image_embeds_input[(ii // 2) % img_len]
         generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).cuda()
         past_key_values = None
         for i in range(image_token_num_per_image):
@@ -147,7 +146,9 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
     torch.cuda.empty_cache()
     return output_images
 # === Text-to-Image generation ===
     torch.cuda.empty_cache()
     conversation = [
@@ -232,7 +233,6 @@ def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt,
     torch.cuda.empty_cache()
     return output_images
 # === Enhanced Gradio handlers ===
 @spaces.GPU(duration=120)
 def janus_generate_image(message, history, uploaded_image=None):
@@ -257,7 +257,6 @@ def janus_generate_image(message, history, uploaded_image=None):
     return {"role": "assistant", "content": {"path": images[0]}}
 # === Alternative interface for explicit text+image input ===
 @spaces.GPU(duration=120)
 def generate_from_text_and_image(prompt, input_image):
@@ -282,7 +281,6 @@ def generate_from_text_and_image(prompt, input_image):
         if os.path.exists(temp_image_path):
             os.remove(temp_image_path)
 @spaces.GPU(duration=120)
 def generate_from_text_only(prompt):
     output_path = "./output/text_only_gen.png"
@@ -293,7 +291,6 @@ def generate_from_text_only(prompt):
     except Exception as e:
         return None, f"Error generating image: {str(e)}"
 # === Enhanced Gradio UI with multiple interfaces ===
 with gr.Blocks(theme="soft", title="Janus Text-to-Image & Text+Image-to-Image") as demo:
     gr.Markdown("# Janus Multi-Modal Image Generation")
@@ -331,8 +328,6 @@ with gr.Blocks(theme="soft", title="Janus Text-to-Image & Text+Image-to-Image")
                     ["a cat sitting on a windowsill", None],
                     ["a futuristic city at sunset", None],
                     ["a dragon flying over mountains", None],
-                    ["Turn this into a nighttime scene", "example_image.jpg"],
-                    ["Make this image more vibrant and colorful", "example_image.jpg"],
                 ],
                 inputs=[msg_input, image_input]
             )

 # === Text-and-Image-to-Image generation ===
 def text_and_image_to_image_generate(input_prompt, input_image_path, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5, cfg_weight2=5):
+    torch.cuda.empty_cache()
     input_img_tokens = vl_chat_processor.image_start_tag + vl_chat_processor.image_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag + vl_chat_processor.image_start_tag + vl_chat_processor.pad_tag * vl_chat_processor.num_image_tokens + vl_chat_processor.image_end_tag
     output_img_tokens = vl_chat_processor.image_start_tag
     input_images = [input_image_path]
     img_len = len(input_images)
     prompts = input_img_tokens * img_len + input_prompt
+    conversation = [
         {"role": "<|User|>", "content": prompts},
         {"role": "<|Assistant|>", "content": ""}
     ]
                 inputs_embeds[ind[0], offset: offset + image_embeds_input.shape[1], :] = image_embeds_input[(ii // 2) % img_len]
         generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).cuda()
         past_key_values = None
         for i in range(image_token_num_per_image):
     torch.cuda.empty_cache()
     return output_images
 # === Text-to-Image generation ===
+def text_to_image_generate(input_prompt, output_path, vl_chat_processor, vl_gpt, temperature=1.0, parallel_size=2, cfg_weight=5.0):
     torch.cuda.empty_cache()
     conversation = [
     torch.cuda.empty_cache()
     return output_images
 # === Enhanced Gradio handlers ===
 @spaces.GPU(duration=120)
 def janus_generate_image(message, history, uploaded_image=None):
     return {"role": "assistant", "content": {"path": images[0]}}
 # === Alternative interface for explicit text+image input ===
 @spaces.GPU(duration=120)
 def generate_from_text_and_image(prompt, input_image):
         if os.path.exists(temp_image_path):
             os.remove(temp_image_path)
 @spaces.GPU(duration=120)
 def generate_from_text_only(prompt):
     output_path = "./output/text_only_gen.png"
     except Exception as e:
         return None, f"Error generating image: {str(e)}"
 # === Enhanced Gradio UI with multiple interfaces ===
 with gr.Blocks(theme="soft", title="Janus Text-to-Image & Text+Image-to-Image") as demo:
     gr.Markdown("# Janus Multi-Modal Image Generation")
                     ["a cat sitting on a windowsill", None],
                     ["a futuristic city at sunset", None],
                     ["a dragon flying over mountains", None],
                 ],
                 inputs=[msg_input, image_input]
             )