Spaces:

NeerjaK
/

DinoV3-Patch-Similarity-Visualiser

Running

App Files Files Community

NeerjaK commited on Oct 22

Commit

745d6a0

1 Parent(s): b3cb8ec

first commit

Browse files

Files changed (3) hide show

app.py +83 -0
requirements.txt +8 -0
utils.py +111 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+from utils import get_patch_embeddings, compute_patch_similarity, overlay_similarity, device
+selected_patch = {"row": 0, "col": 0}
+def init_states(img):
+    if img is None:
+        return gr.update(value=None), None
+    patch_embs, patch_embs_norm, rows, cols = get_patch_embeddings(img, ps=16, device=device)
+    sim_map = compute_patch_similarity(patch_embs, patch_embs_norm, 0, 0)
+    result_img = overlay_similarity(img, sim_map, alpha=0.6, cmap="hot")
+    state = {
+        "img": img,
+        "patch_embs": patch_embs,
+        "patch_embs_norm": patch_embs_norm,
+        "grid_size": rows,
+        "alpha": 0.6,
+        "overlay_img":result_img,
+    }
+    return state, result_img
+def store_patch(evt, state):
+    if state is None or evt is None:
+        return state
+    rows = state["grid_size"]  # e.g., (14, 14)
+    cols = rows
+    overlay_img = state["overlay_img"]
+    overlay_W, overlay_H = overlay_img.size
+    x_click, y_click = evt.index     # coordinates from click event
+    # Map click coordinates to original patch grid
+    col = int(x_click / overlay_W * cols)
+    row = int(y_click / overlay_H * rows)
+    # Clamp to valid range
+    col = min(max(col, 0), cols - 1)
+    row = min(max(row, 0), rows - 1)
+    # Store in global or state dictionary
+    selected_patch["row"] = row
+    selected_patch["col"] = col
+    return state
+def reload_overlay(evt: gr.SelectData,state):
+    if state is None:
+        return None
+    store_patch(evt, state)
+    row, col = selected_patch["row"], selected_patch["col"]
+    img = state["img"]
+    patch_embs = state["patch_embs"]
+    patch_embs_norm = state["patch_embs_norm"]
+    alpha = state["alpha"]
+    sim_map = compute_patch_similarity(patch_embs, patch_embs_norm, row, col)
+    result_img = overlay_similarity(img, sim_map, alpha=alpha, cmap="hot")
+    return result_img
+with gr.Blocks() as demo:
+    state_store = gr.State()
+    gr.Markdown("""
+    <h1 style="font-size:36px; font-weight:bold;">Patch Similarity Visualizer</h1>
+    <ul style="font-size:18px;">
+        <li>Upload an image in the <strong>left box</strong>.</li>
+        <li>Click anywhere in the <strong>right box</strong> to select a patch.</li>
+        <li>View the similarity of the selected patch with all other patches in the image.</li>
+    </ul>
+    """)
+    with gr.Row():
+        img_input = gr.Image(type="pil", label="Upload image")
+        output_img = gr.Image(type="pil", label="Similarity overlay",interactive=True)
+    img_input.change(fn=init_states, inputs=[img_input], outputs=[state_store, output_img])
+    output_img.select(fn=reload_overlay, inputs=[state_store], outputs=[output_img])
+demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+torchvision
+transformers
+timm
+gradio==5.49.1
+numpy
+Pillow
+matplotlib

utils.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from transformers.image_utils import load_image
+import numpy as np
+from PIL import Image
+import matplotlib.pyplot as plt
+import io
+from transformers import AutoImageProcessor, AutoModel
+import torch
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+processor = AutoImageProcessor.from_pretrained("facebook/dinov3-vits16-pretrain-lvd1689m")
+model = AutoModel.from_pretrained(
+    "facebook/dinov3-vits16-pretrain-lvd1689m",
+    torch_dtype=torch.float16,
+    device_map="auto",
+    attn_implementation="sdpa"
+)
+model.eval()
+def display_image(img, rows,cols):
+    W, H = img.size
+    patch_w = W / rows
+    patch_h = H / cols
+    plt.figure(figsize=(8,8))
+    plt.imshow(img)
+    # Draw vertical lines
+    for i in range(1, rows):
+        plt.axvline(i * patch_w, color='white', linestyle='--', linewidth=0.8)
+    # Draw horizontal lines
+    for i in range(1, cols):
+        plt.axhline(i * patch_h, color='white', linestyle='--', linewidth=0.8)
+    plt.axis('off')
+    plt.show()
+def get_patch_embeddings(img, ps=16, device="cuda"):
+    inputs = processor(images=img, return_tensors="pt").to(device, torch.float16)
+    B, C, H, W = inputs["pixel_values"].shape
+    rows, cols = H // ps, W // ps
+    with torch.no_grad():
+        out = model(**inputs)
+    hs = out.last_hidden_state.squeeze(0).detach().cpu().numpy()
+    # remove CLS + register tokens
+    n_patches = rows * cols
+    patch_embs = hs[-n_patches:, :].reshape(rows, cols, -1)
+    # flatten and normalize
+    X = patch_embs.reshape(-1, patch_embs.shape[-1])
+    Xn = X / (np.linalg.norm(X, axis=1, keepdims=True) + 1e-8)
+    return patch_embs, Xn, rows, cols
+def compute_patch_similarity(patch_embs, patch_embs_norm, row, col):
+    rows, cols, dim = patch_embs.shape
+    patch_idx = row * cols + col  # flatten index
+    # cosine similarity via dot product
+    sim = patch_embs_norm @ patch_embs_norm[patch_idx]
+    sim_map = sim.reshape(rows, cols)
+    sim_map = (sim_map - sim_map.min()) / (sim_map.max() - sim_map.min() + 1e-8)
+    return sim_map
+def overlay_similarity(img, sim_map, alpha=0.5, cmap="hot"):
+    """Draw heatmap overlay with grid and return as PIL image (for Gradio)."""
+    W, H = img.size
+    # Expand sim_map (14x14) to full resolution via Kronecker upsampling
+    sim_map_resized = np.kron(sim_map, np.ones((H // sim_map.shape[0], W // sim_map.shape[1])))
+    # Plot to figure (no plt.show())
+    fig, ax = plt.subplots(figsize=(8, 8))
+    ax.imshow(img)
+    ax.imshow(sim_map_resized, cmap=cmap, alpha=alpha)
+    # Draw patch grid
+    patch_w = W / sim_map.shape[1]
+    patch_h = H / sim_map.shape[0]
+    for i in range(1, sim_map.shape[1]):
+        ax.axvline(i * patch_w, color='white', linestyle='--', linewidth=0.8)
+    for i in range(1, sim_map.shape[0]):
+        ax.axhline(i * patch_h, color='white', linestyle='--', linewidth=0.8)
+    ax.axis('off')
+    # Convert figure to PIL image (so Gradio can show it)
+    buf = io.BytesIO()
+    fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
+    plt.close(fig)
+    buf.seek(0)
+    overlay_img = Image.open(buf)
+    return overlay_img
+# img = Image.open("two-cats.jpg")
+# patch_embs,patch_embs_norm,rows,cols= get_patch_embeddings(img,ps=16, device=device)
+# display_image(img,rows,cols)
+# sim_map = compute_patch_similarity(patch_embs, patch_embs_norm, 7, 7)
+# result_img = overlay_similarity(img,sim_map)
+# plt.imshow(result_img)
+# plt.savefig("overlay_result.png")
+# plt.show()