Initial model upload: COCO inpainting U-Net

Browse files

Files changed (3) hide show

README.md +82 -0
inpainting_model_coco.pth +3 -0
model.py +59 -0

README.md ADDED Viewed

	@@ -0,0 +1,82 @@

+---
+license: apache-2.0
+language: en
+library_name: pytorch
+tags:
+- image-inpainting
+- computer-vision
+- pytorch
+- unet
+- coco
+datasets:
+- coco
+---
+# U-Net for Image Inpainting on COCO 2017
+This repository contains a PyTorch implementation of a deep U-Net with Residual Blocks, trained on the COCO 2017 dataset for image inpainting.
+## Model Description
+The model is a `ComplexUNet` architecture, a variant of the standard U-Net adapted for 256x256 images. It features a deep structure with 5 downsampling/upsampling stages and uses residual blocks for more stable training.
+## How to Use
+To use this model, you need to have `torch` and `torchvision` installed.
+1.  Place the `model.py` file in your project directory.
+2.  Download the `inpainting_model_coco.pth` file from the 'Files and versions' tab.
+3.  Load the model as shown below.
+```python
+import torch
+from model import ComplexUNet # Import the class from model.py
+from PIL import Image
+import torchvision.transforms as T
+# --- Setup ---
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+MODEL_PATH = "inpainting_model_coco.pth" # <-- Make sure you've downloaded this file
+# --- Load Model ---
+# Note: Use base_channels=64 as it was during training
+model = ComplexUNet(base_channels=64)
+model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
+model.to(DEVICE)
+model.eval()
+print("Model loaded successfully!")
+# --- Example: Inpaint an image ---
+# 1. Load your masked image
+# masked_image = Image.open("path/to/your/masked_image.png").convert("RGB")
+#
+# 2. Create a tensor from your image
+# transform = T.Compose([
+#     T.Resize(256),
+#     T.CenterCrop(256),
+#     T.ToTensor()
+# ])
+# masked_tensor = transform(masked_image).unsqueeze(0).to(DEVICE)
+#
+# 3. Get the reconstructed image
+# with torch.no_grad():
+#     reconstructed_tensor = model(masked_tensor)
+#
+# 4. Convert tensor back to PIL Image
+# reconstructed_image = T.ToPILImage()(reconstructed_tensor.squeeze(0).cpu())
+# reconstructed_image.save("reconstructed_result.png")
+# print("Inpainting complete. Saved to reconstructed_result.png")
+```
+## Training Details
+- **Framework**: PyTorch
+- **Dataset**: COCO 2017
+- **Epochs**: 10
+- **Batch Size**: 16
+- **Learning Rate**: 0.001
+- **Optimizer**: Adam
+- **Loss Function**: Mean Squared Error (MSE)
+- **Image Resolution**: 256x256

inpainting_model_coco.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:add6cd16e13bbc0003bee51af922041e992ef2a6e58af6eea5f64c8fb1b385ec
+size 520110786

model.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+import torch.nn as nn
+# Model architecture copied directly from the training script.
+class ResidualBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(ResidualBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        if in_channels != out_channels:
+            self.shortcut = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), nn.BatchNorm2d(out_channels))
+        else:
+            self.shortcut = nn.Identity()
+    def forward(self, x):
+        residual = self.shortcut(x)
+        out = self.conv1(x); out = self.bn1(out); out = self.relu(out)
+        out = self.conv2(out); out = self.bn2(out)
+        out += residual
+        out = self.relu(out)
+        return out
+class ComplexUNet(nn.Module):
+    def __init__(self, base_channels=64):
+        super(ComplexUNet, self).__init__()
+        c = base_channels
+        self.pool = nn.MaxPool2d(2, 2)
+        self.enc1 = ResidualBlock(3, c)
+        self.enc2 = ResidualBlock(c, c*2)
+        self.enc3 = ResidualBlock(c*2, c*4)
+        self.enc4 = ResidualBlock(c*4, c*8)
+        self.enc5 = ResidualBlock(c*8, c*16)
+        self.bottleneck = ResidualBlock(c*16, c*32)
+        self.upconv1 = nn.ConvTranspose2d(c*32, c*16, kernel_size=2, stride=2)
+        self.upconv2 = nn.ConvTranspose2d(c*16, c*8, kernel_size=2, stride=2)
+        self.upconv3 = nn.ConvTranspose2d(c*8, c*4, kernel_size=2, stride=2)
+        self.upconv4 = nn.ConvTranspose2d(c*4, c*2, kernel_size=2, stride=2)
+        self.upconv5 = nn.ConvTranspose2d(c*2, c, kernel_size=2, stride=2)
+        self.dec_conv1 = ResidualBlock(c*32, c*16)
+        self.dec_conv2 = ResidualBlock(c*16, c*8)
+        self.dec_conv3 = ResidualBlock(c*8, c*4)
+        self.dec_conv4 = ResidualBlock(c*4, c*2)
+        self.dec_conv5 = ResidualBlock(c*2, c)
+        self.final_conv = nn.Conv2d(c, 3, kernel_size=1)
+    def forward(self, x):
+        e1 = self.enc1(x); p1 = self.pool(e1); e2 = self.enc2(p1); p2 = self.pool(e2)
+        e3 = self.enc3(p2); p3 = self.pool(e3); e4 = self.enc4(p3); p4 = self.pool(e4)
+        e5 = self.enc5(p4); p5 = self.pool(e5)
+        b = self.bottleneck(p5)
+        d1 = self.upconv1(b); d1 = torch.cat([d1, e5], dim=1); d1 = self.dec_conv1(d1)
+        d2 = self.upconv2(d1); d2 = torch.cat([d2, e4], dim=1); d2 = self.dec_conv2(d2)
+        d3 = self.upconv3(d2); d3 = torch.cat([d3, e3], dim=1); d3 = self.dec_conv3(d3)
+        d4 = self.upconv4(d3); d4 = torch.cat([d4, e2], dim=1); d4 = self.dec_conv4(d4)
+        d5 = self.upconv5(d4); d5 = torch.cat([d5, e1], dim=1); d5 = self.dec_conv5(d5)
+        out = self.final_conv(d5)
+        return torch.sigmoid(out)