POET

Running on Zero

App Files Files Community

xh365 commited on Sep 24

Commit

a65a087

1 Parent(s): 185470f

update

Browse files

Files changed (5) hide show

__pycache__/live_preview_helpers.cpython-310.pyc +0 -0
__pycache__/optim_utils.cpython-310.pyc +0 -0
__pycache__/utils.cpython-310.pyc +0 -0
app.py +39 -25
utils.py +2 -2

__pycache__/live_preview_helpers.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/live_preview_helpers.cpython-310.pyc and b/__pycache__/live_preview_helpers.cpython-310.pyc differ

__pycache__/optim_utils.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/optim_utils.cpython-310.pyc and b/__pycache__/optim_utils.cpython-310.pyc differ

__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ from utils import (
 # =========================
 CLIP_MODEL = "ViT-H-14"
 PRETRAINED_CLIP = "laion2b_s32b_b79k"
-default_t2i_model = "black-forest-labs/FLUX.1-dev"
 default_llm_model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
@@ -37,7 +37,6 @@ llm_pipe = None
 torch.cuda.empty_cache()
 inverted_prompt = ""
-VERBAL_MSG = "Please explain your rating of satisfaction in few words or sentences."
 METHOD = "Experimental"  # keep ONLY experimental
 # Global states for a single-task, single-method flow
@@ -45,6 +44,22 @@ counter = 1
 enable_submit = False
 responses_memory = {METHOD: {}}
 # =========================
 # Image Generation Helpers
 # =========================
@@ -88,6 +103,7 @@ def call_gpt_refine_prompt(prompt, num_prompts=5, max_tokens=1000, temperature=0
 def personalize_prompt(prompt, history, feedback, like_image, dislike_image):
     seed = random.randint(0, MAX_SEED)
     client = init_gpt_api()
     messages = get_personalize_message(prompt, history, feedback, like_image, dislike_image)
     outputs = call_gpt_api(messages, client, "gpt-4o", seed, max_tokens=2000, temperature=0.7, top_p=0.9)
     return outputs
@@ -128,21 +144,16 @@ def check_evaluation(sim_radio):
         return False
     return True
-# =========================
-# Core Actions (single method)
-# =========================
 def generate_image(prompt, like_image, dislike_image):
     global responses_memory
     history_prompts = [v["prompt"] for v in responses_memory[METHOD].values()]
     feedback = [v["sim_radio"] for v in responses_memory[METHOD].values()]
-    personalized = personalize_prompt(prompt, history_prompts, feedback, like_image, dislike_image)
-    personalized = clean_refined_prompt_response_gpt(personalized)
-    if "I'm sorry, I can't assist with" in personalized:
-        personalized = prompt
     gallery_images = []
-    # Experimental method refines prompts first
     refined_prompts = call_gpt_refine_prompt(personalized)
     for i in range(NUM_IMAGES):
         img = infer(refined_prompts[i])
@@ -239,19 +250,19 @@ css = """
 with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"]), css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("# 📌 **PAI-GEN — Experimental Only**")
-        instruction = gr.Markdown(INSTRUCTION)
-    with gr.Tab("Task"):
         with gr.Row(elem_id="compact-row"):
             prompt = gr.Textbox(
                 label="🎨 Revise Prompt",
                 max_lines=5,
                 placeholder="Enter your prompt",
-                scale=4,
                 visible=True,
             )
-            next_btn = gr.Button("Generate", variant="primary", scale=1, interactive=False, visible=False)
         with gr.Row(elem_id="compact-row"):
             with gr.Column(elem_id="col-container"):
@@ -282,14 +293,6 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Inconsolata"), "
                 elem_classes=["gradio-radio"]
             )
-            response = gr.Textbox(
-                label="Briefly explain your rating.",
-                max_lines=1,
-                interactive=False,
-                container=False,
-                value=VERBAL_MSG
-            )
         with gr.Column(elem_id="col-container2"):
             example = gr.Examples([['']], prompt, label="Revised Prompt History", visible=False)
             history_images = gr.Gallery(label="History Images", columns=[4], rows=[1], elem_id="gallery", format="png")
@@ -298,6 +301,17 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Inconsolata"), "
                 redesign_btn = gr.Button("🎨 Redesign", variant="primary", scale=0)
                 submit_btn = gr.Button("✅ Submit", variant="primary", interactive=False, scale=0)
 # =========================
 # Wiring
 # =========================

 # =========================
 CLIP_MODEL = "ViT-H-14"
 PRETRAINED_CLIP = "laion2b_s32b_b79k"
+default_t2i_model = "black-forest-labs/FLUX.1-schnell" # "black-forest-labs/FLUX.1-dev"
 default_llm_model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 torch.cuda.empty_cache()
 inverted_prompt = ""
 METHOD = "Experimental"  # keep ONLY experimental
 # Global states for a single-task, single-method flow
 enable_submit = False
 responses_memory = {METHOD: {}}
+example_data = [
+    [
+        "A futuristic city skyline at sunset",
+        IMAGES["Tourist promotion"]["ours"]
+    ],
+    [
+        "A fantasy castle in the clouds",
+        IMAGES["Fictional character generation"]["ours"]
+    ],
+    [
+        "A robot painting a portrait in a studio",
+        IMAGES["Interior Design"]["ours"]
+    ],
+]
+print(example_data)
 # =========================
 # Image Generation Helpers
 # =========================
 def personalize_prompt(prompt, history, feedback, like_image, dislike_image):
     seed = random.randint(0, MAX_SEED)
     client = init_gpt_api()
+    print(like_image, dislike_image)
     messages = get_personalize_message(prompt, history, feedback, like_image, dislike_image)
     outputs = call_gpt_api(messages, client, "gpt-4o", seed, max_tokens=2000, temperature=0.7, top_p=0.9)
     return outputs
         return False
     return True
 def generate_image(prompt, like_image, dislike_image):
     global responses_memory
     history_prompts = [v["prompt"] for v in responses_memory[METHOD].values()]
     feedback = [v["sim_radio"] for v in responses_memory[METHOD].values()]
+    personalized = prompt
+    # personalized = personalize_prompt(prompt, history_prompts, feedback, like_image, dislike_image)
+    # personalized = clean_refined_prompt_response_gpt(personalized)
+    # if "I'm sorry, I can't assist with" in personalized:
+    #     personalized = prompt
     gallery_images = []
     refined_prompts = call_gpt_refine_prompt(personalized)
     for i in range(NUM_IMAGES):
         img = infer(refined_prompts[i])
 with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"]), css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# 📌 **POET**")
+        instruction = gr.Markdown(" Supporting Prompting Creativity and Personalization with Automated Expansion of Text-to-Image Generation")
+    with gr.Tab(""):
         with gr.Row(elem_id="compact-row"):
             prompt = gr.Textbox(
                 label="🎨 Revise Prompt",
                 max_lines=5,
                 placeholder="Enter your prompt",
+                scale=3,
                 visible=True,
             )
+            next_btn = gr.Button("Generate", variant="primary", scale=1)
         with gr.Row(elem_id="compact-row"):
             with gr.Column(elem_id="col-container"):
                 elem_classes=["gradio-radio"]
             )
         with gr.Column(elem_id="col-container2"):
             example = gr.Examples([['']], prompt, label="Revised Prompt History", visible=False)
             history_images = gr.Gallery(label="History Images", columns=[4], rows=[1], elem_id="gallery", format="png")
                 redesign_btn = gr.Button("🎨 Redesign", variant="primary", scale=0)
                 submit_btn = gr.Button("✅ Submit", variant="primary", interactive=False, scale=0)
+        with gr.Column(elem_id="col-container2"):
+            gr.Markdown("### 🌟 Examples")
+            ex1 = gr.Image(label="Image 1", width=200, height=200, sources='upload', format="png", type="filepath", visible=False)
+            ex2 = gr.Image(label="Image 2", width=200, height=200, sources='upload', format="png", type="filepath", visible=False)
+            ex3 = gr.Image(label="Image 3", width=200, height=200, sources='upload', format="png", type="filepath", visible=False)
+            ex4 = gr.Image(label="Image 4", width=200, height=200, sources='upload', format="png", type="filepath", visible=False)
+            gr.Examples(
+                examples=[[ex[0], ex[1][0], ex[1][1], ex[1][2], ex[1][3]] for ex in example_data],
+                inputs=[prompt, ex1, ex2, ex3, ex4]
+            )
 # =========================
 # Wiring
 # =========================

utils.py CHANGED Viewed

@@ -52,7 +52,7 @@ def clean_cache():
 def setup_model(t2i_model_repo, torch_dtype, device):
     if t2i_model_repo == "stabilityai/sdxl-turbo" or t2i_model_repo == "stabilityai/stable-diffusion-3.5-medium" or t2i_model_repo == "stabilityai/stable-diffusion-2-1":
         pipe = DiffusionPipeline.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device)
-    elif t2i_model_repo == "black-forest-labs/FLUX.1-dev":
         # pipe = FluxPipeline.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device)
         pipe = FLUXPipelineWithIntermediateOutputs.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device)
     torch.cuda.empty_cache()
@@ -171,7 +171,7 @@ def get_personalize_message(prompt, history_prompts, history_feedback, like_imag
                         "url": f"data:image/png;base64,{dislike_image_base64}",
                     },
                 })
     return messages
 @spaces.GPU

 def setup_model(t2i_model_repo, torch_dtype, device):
     if t2i_model_repo == "stabilityai/sdxl-turbo" or t2i_model_repo == "stabilityai/stable-diffusion-3.5-medium" or t2i_model_repo == "stabilityai/stable-diffusion-2-1":
         pipe = DiffusionPipeline.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device)
+    elif t2i_model_repo == "black-forest-labs/FLUX.1-dev" or "black-forest-labs/FLUX.1-schnell":
         # pipe = FluxPipeline.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device)
         pipe = FLUXPipelineWithIntermediateOutputs.from_pretrained(t2i_model_repo, torch_dtype=torch_dtype).to(device)
     torch.cuda.empty_cache()
                         "url": f"data:image/png;base64,{dislike_image_base64}",
                     },
                 })
+    print(messages)
     return messages
 @spaces.GPU