POET

Sleeping

App Files Files Community

xh365 commited on Sep 25

Commit

70f494f

1 Parent(s): 13ebcb9

update interface

Browse files

Files changed (5) hide show

__pycache__/live_preview_helpers.cpython-310.pyc +0 -0
__pycache__/optim_utils.cpython-310.pyc +0 -0
__pycache__/utils.cpython-310.pyc +0 -0
app.py +105 -17
optim_utils.py +0 -3

__pycache__/live_preview_helpers.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/live_preview_helpers.cpython-310.pyc and b/__pycache__/live_preview_helpers.cpython-310.pyc differ

__pycache__/optim_utils.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/optim_utils.cpython-310.pyc and b/__pycache__/optim_utils.cpython-310.pyc differ

__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import spaces
 import torch
 import re
 import transformers
 # Optional: keep these utilities if your pipeline depends on them
 from optim_utils import optimize_prompt
@@ -33,17 +34,15 @@ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 clean_cache()
 selected_pipe = setup_model(default_t2i_model, torch_dtype, device)
 llm_pipe = None
-torch.cuda.empty_cache()
 inverted_prompt = ""
 METHOD = "Experimental"  # keep ONLY experimental
-# Global states for a single-task, single-method flow
 counter = 1
 enable_submit = False
 responses_memory = {METHOD: {}}
 example_data = [
     [
         PROMPTS["Tourist promotion"],
@@ -58,7 +57,6 @@ example_data = [
         IMAGES["Interior Design"]["ours"]
     ],
 ]
-print(example_data)
 # =========================
 # Image Generation Helpers
@@ -103,11 +101,31 @@ def call_gpt_refine_prompt(prompt, num_prompts=5, max_tokens=1000, temperature=0
 def personalize_prompt(prompt, history, feedback, like_image, dislike_image):
     seed = random.randint(0, MAX_SEED)
     client = init_gpt_api()
-    print(like_image, dislike_image)
     messages = get_personalize_message(prompt, history, feedback, like_image, dislike_image)
     outputs = call_gpt_api(messages, client, "gpt-4o", seed, max_tokens=2000, temperature=0.7, top_p=0.9)
     return outputs
 # =========================
 # UI Helper Functions
 # =========================
@@ -241,29 +259,98 @@ css = """
     display: flex;
     justify-content: center;
 }
 #compact-row {
     width:100%;
     max-width: 1000px;
     margin: 0px auto;
 }
 """
 with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"]), css=css) as demo:
-    with gr.Column(elem_id="col-container"):
         gr.Markdown("# 📌 **POET**")
-        instruction = gr.Markdown("Supporting Prompting Creativity with Automated Expansion of Text-to-Image Generation")
-        instruction = gr.Markdown("Images generated by POET is more diverse in multiple aspects; e.g., background, race, ...")
     with gr.Tab(""):
         with gr.Row(elem_id="compact-row"):
-            prompt = gr.Textbox(
-                label="🎨 Prompt",
-                max_lines=5,
-                placeholder="Enter your prompt",
-                scale=3,
-                visible=True,
-            )
-            next_btn = gr.Button("Generate", variant="primary", scale=1)
         with gr.Row(elem_id="compact-row"):
             with gr.Column(elem_id="col-container"):
@@ -313,6 +400,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Inconsolata"), "
                 examples=[[ex[0], ex[1][0], ex[1][1], ex[1][2], ex[1][3]] for ex in example_data],
                 inputs=[prompt, ex1, ex2, ex3, ex4]
             )
 # =========================
 # Wiring
 # =========================

 import torch
 import re
 import transformers
+import open_clip
 # Optional: keep these utilities if your pipeline depends on them
 from optim_utils import optimize_prompt
 clean_cache()
 selected_pipe = setup_model(default_t2i_model, torch_dtype, device)
+clip_model, _, preprocess = open_clip.create_model_and_transforms(CLIP_MODEL, pretrained=PRETRAINED_CLIP, device=device)
 llm_pipe = None
 inverted_prompt = ""
+torch.cuda.empty_cache()
 METHOD = "Experimental"  # keep ONLY experimental
 counter = 1
 enable_submit = False
 responses_memory = {METHOD: {}}
 example_data = [
     [
         PROMPTS["Tourist promotion"],
         IMAGES["Interior Design"]["ours"]
     ],
 ]
 # =========================
 # Image Generation Helpers
 def personalize_prompt(prompt, history, feedback, like_image, dislike_image):
     seed = random.randint(0, MAX_SEED)
     client = init_gpt_api()
     messages = get_personalize_message(prompt, history, feedback, like_image, dislike_image)
     outputs = call_gpt_api(messages, client, "gpt-4o", seed, max_tokens=2000, temperature=0.7, top_p=0.9)
     return outputs
+@spaces.GPU(duration=100)
+def invert_prompt(prompt, images, prompt_len=15, iter=500, lr=0.1, batch_size=2):
+    global inverted_prompt
+    text_params = {
+        "iter": iter,
+        "lr": lr,
+        "batch_size": batch_size,
+        "prompt_len": prompt_len,
+        "weight_decay": 0.1,
+        "prompt_bs": 1,
+        "loss_weight": 1.0,
+        "print_step": 100,
+        "clip_model": CLIP_MODEL,
+        "clip_pretrain": PRETRAINED_CLIP,
+    }
+    inverted_prompt = optimize_prompt(clip_model, preprocess, text_params, device, target_images=images, target_prompts=prompt)
+    print(inverted_prompt)
+    # eval(prompt, learned_prompt, optimized_images, clip_model, preprocess)
+    # return learned_prompt
 # =========================
 # UI Helper Functions
 # =========================
     display: flex;
     justify-content: center;
 }
+#compact-compact-row {
+    width:100%;
+    max-width: 800px;
+    margin: 0px auto;
+}
 #compact-row {
     width:100%;
     max-width: 1000px;
     margin: 0px auto;
 }
+.header-section {
+    text-align: center;
+    margin-bottom: 2rem;
+}
+.abstract-text {
+    text-align: justify;
+    line-height: 1.6;
+    margin: 0.5rem 0;
+    padding: 0.5rem;
+    background-color: rgba(0, 0, 0, 0.05);
+    border-radius: 8px;
+    border-left: 4px solid #3498db;
+}
+.paper-link {
+    display: inline-block;
+    margin: 0rem 0;
+    padding: 0rem 0rem;
+    background-color: #3498db;
+    color: white;
+    text-decoration: none;
+    border-radius: 5px;
+    font-weight: 500;
+}
+.paper-link:hover {
+    background-color: #2980b9;
+    text-decoration: none;
+}
+.authors-section {
+    text-align: center;
+    margin: 0 0;
+    font-style: italic;
+    color: #666;
+}
+.authors-title {
+    font-weight: bold;
+    margin-bottom: 0rem;
+    color: #333;
+}
 """
 with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"]), css=css) as demo:
+    with gr.Column(elem_id="col-container", elem_classes=["header-section"]):
         gr.Markdown("# 📌 **POET**")
+        gr.Markdown("## Supporting Prompting Creativity with Automated Expansion of Text-to-Image Generation")
+        # <strong>Abstract:</strong> State-of-the-art visual generative AI tools hold immense potential to assist users in the early ideation stages of creative tasks — offering the ability to generate (rather than search for) novel and unprecedented (instead of existing) images of considerable quality that also adhere to boundless combinations of user specifications. However, many large-scale text-to-image systems are designed for broad applicability, yielding conventional output that may limit creative exploration. They also employ interaction methods that may be difficult for beginners.        #
+        gr.Markdown("""
+        <div class="abstract-text">
+        <strong>Abstract:</strong> Given that creative end-users often operate in diverse, context-specific ways that are often unpredictable, more variation and personalization are necessary. We introduce POET, a real-time interactive tool that (1) automatically discovers dimensions of homogeneity in text-to-image generative models, (2) expands these dimensions to diversify the output space of generated images, and (3) learns from user feedback to personalize expansions. Focusing on visual creativity, POET offers a first glimpse of how interaction techniques of future text-to-image generation tools may support and align with more pluralistic values and the needs of end-users during the ideation stages of their work.
+        </div>
+        """, elem_classes=["abstract-text"])
+        # Paper Link
+        gr.HTML("""
+        <div style="text-align: center;">
+            <a href="https://arxiv.org/pdf/2504.13392" target="_blank" class="paper-link">
+                📄 Read the Full Paper .
+            </a>
+        </div>
+        """)
+        # Authors
+        gr.Markdown("""
+        <div class="authors-section">
+            Evans Han, Alice Qian Zhang, Haiyi Zhu, Hong Shen, Paul Pu Liang, Jane Hsieh
+        </div>
+        """, elem_classes=["authors-section"])
+        # gr.Markdown("---")
     with gr.Tab(""):
         with gr.Row(elem_id="compact-row"):
+            with gr.Column(elem_id="col-container"):
+                with gr.Row():
+                    prompt = gr.Textbox(
+                        label="🎨 Prompt",
+                        max_lines=5,
+                        placeholder="Enter your prompt",
+                        visible=True,
+                    )
+            with gr.Column(elem_id="col-container3"):
+                next_btn = gr.Button("Generate", variant="primary", scale=1)
         with gr.Row(elem_id="compact-row"):
             with gr.Column(elem_id="col-container"):
                 examples=[[ex[0], ex[1][0], ex[1][1], ex[1][2], ex[1][3]] for ex in example_data],
                 inputs=[prompt, ex1, ex2, ex3, ex4]
             )
 # =========================
 # Wiring
 # =========================

optim_utils.py CHANGED Viewed

@@ -19,9 +19,6 @@ def nn_project(curr_embeds, embedding_layer, print_hits=False):
     with torch.no_grad():
         bsz,seq_len,emb_dim = curr_embeds.shape
-        # Using the sentence transformers semantic search which is
-        # a dot product exact kNN search between a set of
-        # query vectors and a corpus of vectors
         curr_embeds = curr_embeds.reshape((-1,emb_dim))
         curr_embeds = normalize_embeddings(curr_embeds) # queries

     with torch.no_grad():
         bsz,seq_len,emb_dim = curr_embeds.shape
         curr_embeds = curr_embeds.reshape((-1,emb_dim))
         curr_embeds = normalize_embeddings(curr_embeds) # queries