FLUX.1-Kontext-multi-image

Running

App Files Files Community

aiqtech commited on Aug 31

Commit

e9a878b

verified ·

1 Parent(s): c6b93da

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -36

app.py CHANGED Viewed

@@ -118,30 +118,32 @@ def prepare_images_for_kontext(reference_image, pose_image, target_size=512):
     return concatenated
-def extract_pose_edges(image):
     """
-    Extract edge/pose information from an image.
     """
-    if image is None:
         return None
-    # Convert to grayscale
-    gray = image.convert("L")
-    # Apply edge detection
     edges = gray.filter(ImageFilter.FIND_EDGES)
-    # Enhance contrast
-    edges = ImageOps.autocontrast(edges)
-    # Invert to get black lines on white
-    edges = ImageOps.invert(edges)
-    # Smooth the result
-    edges = edges.filter(ImageFilter.SMOOTH_MORE)
-    # Convert back to RGB
-    return edges.convert("RGB")
 @spaces.GPU(duration=60)
 def generate_pose_transfer(
@@ -151,9 +153,10 @@ def generate_pose_transfer(
     negative_prompt="",
     seed=42,
     randomize_seed=False,
-    guidance_scale=3.5,
     num_inference_steps=28,
     lora_scale=1.0,
     enhance_pose=False,
     progress=gr.Progress(track_tqdm=True)
 ):
@@ -205,14 +208,17 @@ def generate_pose_transfer(
     if (width, height) != concatenated_input.size:
         concatenated_input = concatenated_input.resize((width, height), Image.LANCZOS)
-    # Construct prompt with trigger word
     if prompt:
-        full_prompt = f"{TRIGGER_WORD}, {prompt}"
     else:
-        full_prompt = f"{TRIGGER_WORD}, transfer the pose from the right image to the subject in the left image while maintaining their identity, clothing, and style"
-    # Add instruction for the model
-    full_prompt += ". The left image shows the reference subject, the right image shows the target pose."
     # Set generator for reproducibility
     generator = torch.Generator("cuda").manual_seed(seed)
@@ -221,30 +227,33 @@ def generate_pose_transfer(
         # Check if we have LoRA capabilities
         has_lora = hasattr(pipe, 'set_adapters') and "LoRA" in MODEL_STATUS
-        # Set LoRA if available
         if has_lora:
             try:
-                pipe.set_adapters(["refcontrol"], adapter_weights=[lora_scale])
-                print(f"LoRA adapter set with strength: {lora_scale}")
             except Exception as e:
                 print(f"LoRA adapter not set: {e}")
         print(f"Generating with size: {width}x{height}")
-        print(f"Prompt: {full_prompt[:100]}...")
-        # Generate image
         with torch.cuda.amp.autocast(dtype=torch.bfloat16):
             if "Kontext" in MODEL_STATUS:
-                # Use Kontext pipeline
                 result = pipe(
                     image=concatenated_input,
                     prompt=full_prompt,
-                    negative_prompt=negative_prompt if negative_prompt else "",
-                    guidance_scale=guidance_scale,
                     num_inference_steps=num_inference_steps,
                     generator=generator,
                     width=width,
                     height=height,
                 ).images[0]
             else:
                 # Use standard FLUX pipeline
@@ -424,11 +433,11 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
                     guidance_scale = gr.Slider(
                         label="Guidance Scale",
-                        minimum=1.0,
-                        maximum=10.0,
                         step=0.5,
-                        value=3.5,
-                        info="How strictly to follow the pose"
                     )
                     num_inference_steps = gr.Slider(
@@ -436,17 +445,17 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
                         minimum=20,
                         maximum=50,
                         step=1,
-                        value=28
                     )
                     if "LoRA" in MODEL_STATUS:
                         lora_scale = gr.Slider(
                             label="LoRA Strength",
-                            minimum=0.0,
                             maximum=2.0,
                             step=0.1,
-                            value=1.0,
-                            info="RefControl LoRA influence"
                         )
                     else:
                         lora_scale = gr.Slider(
@@ -458,6 +467,15 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
                             interactive=False
                         )
                     enhance_pose = gr.Checkbox(
                         label="Auto-enhance pose edges",
                         value=False

     return concatenated
+def process_pose_for_control(pose_image):
     """
+    Process pose image to ensure maximum contrast and clarity for control
     """
+    if pose_image is None:
         return None
+    # Convert to grayscale first
+    gray = pose_image.convert("L")
+    # Apply strong edge detection
     edges = gray.filter(ImageFilter.FIND_EDGES)
+    edges = edges.filter(ImageFilter.EDGE_ENHANCE_MORE)
+    # Maximize contrast
+    edges = ImageOps.autocontrast(edges, cutoff=2)
+    # Convert to pure black and white
+    threshold = 128
+    edges = edges.point(lambda x: 255 if x > threshold else 0, mode='1')
+    # Convert back to RGB with inverted colors (black lines on white)
+    edges = edges.convert("RGB")
+    edges = ImageOps.invert(edges)
+    return edges
 @spaces.GPU(duration=60)
 def generate_pose_transfer(
     negative_prompt="",
     seed=42,
     randomize_seed=False,
+    guidance_scale=7.5,  # Increased for better pose adherence
     num_inference_steps=28,
     lora_scale=1.0,
+    controlnet_scale=1.0,  # Added control strength
     enhance_pose=False,
     progress=gr.Progress(track_tqdm=True)
 ):
     if (width, height) != concatenated_input.size:
         concatenated_input = concatenated_input.resize((width, height), Image.LANCZOS)
+    # Construct prompt with trigger word - CRITICAL FOR POSE CONTROL
+    # The prompt must explicitly describe the pose transfer task
+    base_instruction = f"{TRIGGER_WORD}, A photo composed of two images side by side. Left: reference person. Right: target pose skeleton. Task: Generate the person from the left image in the exact pose shown in the right image"
     if prompt:
+        full_prompt = f"{base_instruction}. Additional details: {prompt}"
     else:
+        full_prompt = base_instruction
+    # Add strong pose control instructions
+    full_prompt += ". IMPORTANT: Strictly follow the pose/skeleton from the right image while preserving the identity, clothing, and appearance from the left image. The output should show ONLY the transformed person, not the side-by-side layout."
     # Set generator for reproducibility
     generator = torch.Generator("cuda").manual_seed(seed)
         # Check if we have LoRA capabilities
         has_lora = hasattr(pipe, 'set_adapters') and "LoRA" in MODEL_STATUS
+        # Set LoRA with higher strength for better pose control
         if has_lora:
             try:
+                # Increase LoRA strength for pose control
+                actual_lora_scale = lora_scale * 1.0  # Boost LoRA influence
+                pipe.set_adapters(["refcontrol"], adapter_weights=[actual_lora_scale])
+                print(f"LoRA adapter set with boosted strength: {actual_lora_scale}")
             except Exception as e:
                 print(f"LoRA adapter not set: {e}")
         print(f"Generating with size: {width}x{height}")
+        print(f"Prompt: {full_prompt[:200]}...")
+        # Generate image with stronger pose control
         with torch.cuda.amp.autocast(dtype=torch.bfloat16):
             if "Kontext" in MODEL_STATUS:
+                # Use Kontext pipeline with enhanced settings
                 result = pipe(
                     image=concatenated_input,
                     prompt=full_prompt,
+                    negative_prompt=negative_prompt if negative_prompt else "blurry, distorted, deformed, wrong pose, incorrect posture",
+                    guidance_scale=guidance_scale,  # Higher for better control
                     num_inference_steps=num_inference_steps,
                     generator=generator,
                     width=width,
                     height=height,
+                    controlnet_conditioning_scale=controlnet_scale,  # Control strength
                 ).images[0]
             else:
                 # Use standard FLUX pipeline
                     guidance_scale = gr.Slider(
                         label="Guidance Scale",
+                        minimum=5.0,
+                        maximum=15.0,
                         step=0.5,
+                        value=7.5,
+                        info="Higher = stricter pose following (7-10 recommended)"
                     )
                     num_inference_steps = gr.Slider(
                         minimum=20,
                         maximum=50,
                         step=1,
+                        value=30
                     )
                     if "LoRA" in MODEL_STATUS:
                         lora_scale = gr.Slider(
                             label="LoRA Strength",
+                            minimum=0.5,
                             maximum=2.0,
                             step=0.1,
+                            value=1.2,
+                            info="RefControl LoRA influence (1.0-1.5 recommended)"
                         )
                     else:
                         lora_scale = gr.Slider(
                             interactive=False
                         )
+                    controlnet_scale = gr.Slider(
+                        label="Pose Control Strength",
+                        minimum=0.5,
+                        maximum=2.0,
+                        step=0.1,
+                        value=1.0,
+                        info="How strongly to enforce the pose"
+                    )
                     enhance_pose = gr.Checkbox(
                         label="Auto-enhance pose edges",
                         value=False