Spaces:

naver
/

SuperFeatures

Build error

App Files Files Community

YannisK commited on Apr 13, 2022

Commit

9651aac

1 Parent(s): 2f370fd

temp state

Browse files

Files changed (4) hide show

README.md +11 -1
app.py +36 -2
fire_network.py +130 -0
lit.py +92 -0

README.md CHANGED Viewed

	@@ -1 +1,11 @@
1	- ~~TBD~~

+---
+title: SuperFeatures
+emoji: 📚
+colorFrom: red
+colorTo: yellow
+sdk: gradio
+app_file: app.py
+pinned: false
+---
+# Learning Super-Features for Image Retrieval
+A demo for the ICLR 22 paper "Learning Super-Features for Image Retrieval". [[Paper](https://openreview.net/pdf?id=wogsFPHwftY)] [[Official Github Repo](https://github.com/naver/fire)]

app.py CHANGED Viewed

@@ -3,6 +3,40 @@ import gradio as gr
 def greet(name):
     return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 def greet(name):
     return "Hello " + name + "!!"
+# Model to use
+net_path = 'fire.pth'
+# CPU / GPU
+device = 'cpu'
+# Images will be downscaled to this size prior processing with the network
+image_size = 1024
+# Wrapper
+def generate_matching_superfeatures(im1, im2, scale=6):
+    # Possible Scales for multiscale inference
+    scales = [2.0, 1.414, 1.0, 0.707, 0.5, 0.353, 0.25]
+# GRADIO APP
+title = "Visualizing Super-features"
+description = "TBD"
+article = "<p style='text-align: center'><a href='https://github.com/naver/fire' target='_blank'>Original Github Repo</a></p>"
+iface = gr.Interface(
+    fn=generate_matching_superfeatures,
+    inputs=[
+        gr.inputs.Image(shape=(240, 240), type="pil"),
+        gr.inputs.Image(shape=(240, 240), type="pil"),
+        gr.inputs.Slider(minimum=1, maximum=7, step=1, default=2, label="Scale")],
+    outputs="plot",
+    enable_queue=True,
+    title=title,
+    description=description,
+    article=article,
+    examples=[["chateau_1.png", "chateau_2.png", 6]],
+)
+iface.launch()

fire_network.py ADDED Viewed

	@@ -0,0 +1,130 @@

+# Copyright (C) 2021-2022 Naver Corporation. All rights reserved.
+# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
+import os
+import torch
+from torch import nn
+import torchvision
+from cirtorch.networks import imageretrievalnet
+from how import layers
+from how.layers import functional as HF
+from lit import LocalfeatureIntegrationTransformer
+from how.networks.how_net import HOWNet, CORERCF_SIZE
+class FIReNet(HOWNet):
+    def __init__(self, features, attention, lit, dim_reduction, meta, runtime):
+        super().__init__(features, attention, None, dim_reduction, meta, runtime)
+        self.lit = lit
+        self.return_global = False
+    def copy_excluding_dim_reduction(self):
+        """Return a copy of this network without the dim_reduction layer"""
+        meta = {**self.meta, "outputdim": self.meta['backbone_dim']}
+        return self.__class__(self.features, self.attention, self.lit, None, meta, self.runtime)
+    def copy_with_runtime(self, runtime):
+        """Return a copy of this network with a different runtime dict"""
+        return self.__class__(self.features, self.attention, self.lit, self.dim_reduction, self.meta, runtime)
+    def parameter_groups(self):
+        """Return torch parameter groups"""
+        layers = [self.features, self.attention, self.smoothing, self.lit]
+        parameters = [{'params': x.parameters()} for x in layers if x is not None]
+        if self.dim_reduction:
+            # Do not update dimensionality reduction layer
+            parameters.append({'params': self.dim_reduction.parameters(), 'lr': 0.0})
+        return parameters
+    def get_superfeatures(self, x, *, scales):
+        """
+        return a list of tuple (features, attentionmpas) where each is a list containing requested scales
+        features is a tensor BxDxNx1
+        attentionmaps is a tensor BxNxHxW
+        """
+        feats = []
+        attns = []
+        strengths = []
+        for s in scales:
+            xs = nn.functional.interpolate(x, scale_factor=s, mode='bilinear', align_corners=False)
+            o = self.features(xs)
+            o, attn = self.lit(o)
+            strength = self.attention(o)
+            if self.smoothing:
+                o = self.smoothing(o)
+            if self.dim_reduction:
+                o = self.dim_reduction(o)
+            feats.append(o)
+            attns.append(attn)
+            strengths.append(strength)
+        return feats, attns, strengths
+    def forward(self, x):
+        if self.return_global:
+            return self.forward_global(x, scales=self.runtime['training_scales'])
+        return self.get_superfeatures(x, scales=self.runtime['training_scales'])
+    def forward_global(self, x, *, scales):
+        """Return global descriptor"""
+        feats, _, strengths = self.get_superfeatures(x, scales=scales)
+        return HF.weighted_spoc(feats, strengths)
+    def forward_local(self, x, *, features_num, scales):
+        """Return selected super features"""
+        feats, _, strengths = self.get_superfeatures(x, scales=scales)
+        return HF.how_select_local(feats, strengths, scales=scales, features_num=features_num)
+def init_network(architecture, pretrained, skip_layer, dim_reduction, lit, runtime):
+    """Initialize FIRe network
+    :param str architecture: Network backbone architecture (e.g. resnet18)
+    :param str pretrained: url of the pretrained model (None for using random initialization)
+    :param int skip_layer: How many layers of blocks should be skipped (from the end)
+    :param dict dim_reduction: Options for the dimensionality reduction layer
+    :param dict lit: Options for the lit layer
+    :param dict runtime: Runtime options to be stored in the network
+    :return FIRe: Initialized network
+    """
+    # Take convolutional layers as features, always ends with ReLU to make last activations non-negative
+    net_in = getattr(torchvision.models, architecture)(pretrained=False) # use trained weights including the LIT module instead
+    if architecture.startswith('alexnet') or architecture.startswith('vgg'):
+        features = list(net_in.features.children())[:-1]
+    elif architecture.startswith('resnet'):
+        features = list(net_in.children())[:-2]
+    elif architecture.startswith('densenet'):
+        features = list(net_in.features.children()) + [nn.ReLU(inplace=True)]
+    elif architecture.startswith('squeezenet'):
+        features = list(net_in.features.children())
+    else:
+        raise ValueError('Unsupported or unknown architecture: {}!'.format(architecture))
+    if skip_layer > 0:
+        features = features[:-skip_layer]
+    backbone_dim = imageretrievalnet.OUTPUT_DIM[architecture] // (2 ** skip_layer)
+    att_layer = layers.attention.L2Attention()
+    lit_layer = LocalfeatureIntegrationTransformer(**lit, input_dim=backbone_dim)
+    reduction_layer = None
+    if dim_reduction:
+        reduction_layer = layers.dim_reduction.ConvDimReduction(**dim_reduction, input_dim=lit['dim'])
+    meta = {
+        "architecture": architecture,
+        "backbone_dim": lit['dim'],
+        "outputdim": reduction_layer.out_channels if dim_reduction else lit['dim'],
+        "corercf_size": CORERCF_SIZE[architecture] // (2 ** skip_layer),
+    }
+    net = FIReNet(nn.Sequential(*features), att_layer, lit_layer, reduction_layer, meta, runtime)
+    if pretrained is not None:
+        assert os.path.isfile(pretrained), pretrained
+        ckpt = torch.load(pretrained, map_location='cpu')
+        missing, unexpected = net.load_state_dict(ckpt['state_dict'], strict=False)
+        assert all(['dim_reduction' in a for a in missing]), "Loading did not go well"
+        assert all(['fc' in a for a in unexpected]), "Loading did not go well"
+    return net

lit.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# Copyright (C) 2021-2022 Naver Corporation. All rights reserved.
+# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
+import torch
+from torch import nn
+class LocalfeatureIntegrationTransformer(nn.Module):
+    """Map a set of local features to a fixed number of SuperFeatures """
+    def __init__(self, T, N, input_dim, dim):
+        """
+        T: number of iterations
+        N: number of SuperFeatures
+        input_dim: dimension of input local features
+        dim: dimension of SuperFeatures
+        """
+        super().__init__()
+        self.T = T
+        self.N = N
+        self.input_dim = input_dim
+        self.dim = dim
+        # learnable initialization
+        self.templates_init = nn.Parameter(torch.randn(1,self.N,dim))
+        # qkv
+        self.project_q = nn.Linear(dim, dim, bias=False)
+        self.project_k = nn.Linear(input_dim, dim, bias=False)
+        self.project_v = nn.Linear(input_dim, dim, bias=False)
+        # layer norms
+        self.norm_inputs = nn.LayerNorm(input_dim)
+        self.norm_templates = nn.LayerNorm(dim)
+        # for the normalization
+        self.softmax = nn.Softmax(dim=-1)
+        self.scale = dim ** -0.5
+        # mlp
+        self.norm_mlp = nn.LayerNorm(dim)
+        mlp_dim = dim//2
+        self.mlp = nn.Sequential(nn.Linear(dim, mlp_dim), nn.ReLU(), nn.Linear(mlp_dim, dim) )
+    def forward(self, x):
+        """
+        input:
+            x has shape BxCxHxW
+        output:
+            template (output SuperFeatures): tensor of shape BxCxNx1
+            attn (attention over local features at the last iteration): tensor of shape BxNxHxW
+        """
+        # reshape inputs from BxCxHxW to Bx(H*W)xC
+        B,C,H,W = x.size()
+        x = x.reshape(B,C,H*W).permute(0,2,1)
+        # k and v projection
+        x = self.norm_inputs(x)
+        k = self.project_k(x)
+        v = self.project_v(x)
+        # template initialization
+        templates = torch.repeat_interleave(self.templates_init, B, dim=0)
+        attn = None
+        # main iteration loop
+        for _ in range(self.T):
+            templates_prev = templates
+            # q projection
+            templates = self.norm_templates(templates)
+            q = self.project_q(templates)
+            # attention
+            q = q * self.scale  # Normalization.
+            attn_logits =  torch.einsum('bnd,bld->bln', q, k)
+            attn = self.softmax(attn_logits)
+            attn = attn + 1e-8 # to avoid zero when with the L1 norm below
+            attn = attn / attn.sum(dim=-2, keepdim=True)
+            # update template
+            templates = templates_prev + torch.einsum('bld,bln->bnd', v, attn)
+            # mlp
+            templates = templates + self.mlp(self.norm_mlp(templates))
+        # reshape templates to BxDxNx1
+        templates = templates.permute(0,2,1)[:,:,:,None]
+        attn = attn.permute(0,2,1).view(B,self.N,H,W)
+        return templates, attn
+    def __repr__(self):
+        s = str(self.__class__.__name__)
+        for k in ["T","N","input_dim","dim"]:
+            s += "\n  {:s}: {:d}".format(k, getattr(self,k))
+        return s