Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from PIL import Image | |
| import cv2 | |
| import random | |
| import math | |
| import argparse | |
| import torch | |
| from torch.utils import data | |
| from torch.nn import functional as F | |
| from torch import autograd | |
| from torch.nn import init | |
| import torchvision.transforms as transforms | |
| from model.stylegan.op import conv2d_gradfix | |
| from model.encoder.encoders.psp_encoders import GradualStyleEncoder | |
| from model.encoder.align_all_parallel import get_landmark | |
| def visualize(img_arr, dpi): | |
| plt.figure(figsize=(10,10),dpi=dpi) | |
| plt.imshow(((img_arr.detach().cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8)) | |
| plt.axis('off') | |
| plt.show() | |
| def save_image(img, filename): | |
| tmp = ((img.detach().cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8) | |
| cv2.imwrite(filename, cv2.cvtColor(tmp, cv2.COLOR_RGB2BGR)) | |
| def load_image(filename): | |
| transform = transforms.Compose([ | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5,0.5,0.5]), | |
| ]) | |
| img = Image.open(filename) | |
| img = transform(img) | |
| return img.unsqueeze(dim=0) | |
| def data_sampler(dataset, shuffle, distributed): | |
| if distributed: | |
| return data.distributed.DistributedSampler(dataset, shuffle=shuffle) | |
| if shuffle: | |
| return data.RandomSampler(dataset) | |
| else: | |
| return data.SequentialSampler(dataset) | |
| def requires_grad(model, flag=True): | |
| for p in model.parameters(): | |
| p.requires_grad = flag | |
| def accumulate(model1, model2, decay=0.999): | |
| par1 = dict(model1.named_parameters()) | |
| par2 = dict(model2.named_parameters()) | |
| for k in par1.keys(): | |
| par1[k].data.mul_(decay).add_(par2[k].data, alpha=1 - decay) | |
| def sample_data(loader): | |
| while True: | |
| for batch in loader: | |
| yield batch | |
| def d_logistic_loss(real_pred, fake_pred): | |
| real_loss = F.softplus(-real_pred) | |
| fake_loss = F.softplus(fake_pred) | |
| return real_loss.mean() + fake_loss.mean() | |
| def d_r1_loss(real_pred, real_img): | |
| with conv2d_gradfix.no_weight_gradients(): | |
| grad_real, = autograd.grad( | |
| outputs=real_pred.sum(), inputs=real_img, create_graph=True | |
| ) | |
| grad_penalty = grad_real.pow(2).reshape(grad_real.shape[0], -1).sum(1).mean() | |
| return grad_penalty | |
| def g_nonsaturating_loss(fake_pred): | |
| loss = F.softplus(-fake_pred).mean() | |
| return loss | |
| def g_path_regularize(fake_img, latents, mean_path_length, decay=0.01): | |
| noise = torch.randn_like(fake_img) / math.sqrt( | |
| fake_img.shape[2] * fake_img.shape[3] | |
| ) | |
| grad, = autograd.grad( | |
| outputs=(fake_img * noise).sum(), inputs=latents, create_graph=True | |
| ) | |
| path_lengths = torch.sqrt(grad.pow(2).sum(2).mean(1)) | |
| path_mean = mean_path_length + decay * (path_lengths.mean() - mean_path_length) | |
| path_penalty = (path_lengths - path_mean).pow(2).mean() | |
| return path_penalty, path_mean.detach(), path_lengths | |
| def make_noise(batch, latent_dim, n_noise, device): | |
| if n_noise == 1: | |
| return torch.randn(batch, latent_dim, device=device) | |
| noises = torch.randn(n_noise, batch, latent_dim, device=device).unbind(0) | |
| return noises | |
| def mixing_noise(batch, latent_dim, prob, device): | |
| if prob > 0 and random.random() < prob: | |
| return make_noise(batch, latent_dim, 2, device) | |
| else: | |
| return [make_noise(batch, latent_dim, 1, device)] | |
| def set_grad_none(model, targets): | |
| for n, p in model.named_parameters(): | |
| if n in targets: | |
| p.grad = None | |
| def weights_init(m): | |
| classname = m.__class__.__name__ | |
| if classname.find('BatchNorm2d') != -1: | |
| if hasattr(m, 'weight') and m.weight is not None: | |
| init.normal_(m.weight.data, 1.0, 0.02) | |
| if hasattr(m, 'bias') and m.bias is not None: | |
| init.constant_(m.bias.data, 0.0) | |
| elif hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): | |
| init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') | |
| if hasattr(m, 'bias') and m.bias is not None: | |
| init.constant_(m.bias.data, 0.0) | |
| def load_psp_standalone(checkpoint_path, device='cuda'): | |
| ckpt = torch.load(checkpoint_path, map_location='cpu') | |
| opts = ckpt['opts'] | |
| if 'output_size' not in opts: | |
| opts['output_size'] = 1024 | |
| opts['n_styles'] = int(math.log(opts['output_size'], 2)) * 2 - 2 | |
| opts = argparse.Namespace(**opts) | |
| psp = GradualStyleEncoder(50, 'ir_se', opts) | |
| psp_dict = {k.replace('encoder.', ''): v for k, v in ckpt['state_dict'].items() if k.startswith('encoder.')} | |
| psp.load_state_dict(psp_dict) | |
| psp.eval() | |
| psp = psp.to(device) | |
| latent_avg = ckpt['latent_avg'].to(device) | |
| def add_latent_avg(model, inputs, outputs): | |
| return outputs + latent_avg.repeat(outputs.shape[0], 1, 1) | |
| psp.register_forward_hook(add_latent_avg) | |
| return psp | |
| def get_video_crop_parameter(filepath, predictor, padding=[200,200,200,200]): | |
| if type(filepath) == str: | |
| img = dlib.load_rgb_image(filepath) | |
| else: | |
| img = filepath | |
| lm = get_landmark(img, predictor) | |
| if lm is None: | |
| return None | |
| lm_chin = lm[0 : 17] # left-right | |
| lm_eyebrow_left = lm[17 : 22] # left-right | |
| lm_eyebrow_right = lm[22 : 27] # left-right | |
| lm_nose = lm[27 : 31] # top-down | |
| lm_nostrils = lm[31 : 36] # top-down | |
| lm_eye_left = lm[36 : 42] # left-clockwise | |
| lm_eye_right = lm[42 : 48] # left-clockwise | |
| lm_mouth_outer = lm[48 : 60] # left-clockwise | |
| lm_mouth_inner = lm[60 : 68] # left-clockwise | |
| scale = 64. / (np.mean(lm_eye_right[:,0])-np.mean(lm_eye_left[:,0])) | |
| center = ((np.mean(lm_eye_right, axis=0)+np.mean(lm_eye_left, axis=0)) / 2) * scale | |
| h, w = round(img.shape[0] * scale), round(img.shape[1] * scale) | |
| left = max(round(center[0] - padding[0]), 0) // 8 * 8 | |
| right = min(round(center[0] + padding[1]), w) // 8 * 8 | |
| top = max(round(center[1] - padding[2]), 0) // 8 * 8 | |
| bottom = min(round(center[1] + padding[3]), h) // 8 * 8 | |
| return h,w,top,bottom,left,right,scale | |
| def tensor2cv2(img): | |
| tmp = ((img.cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8) | |
| return cv2.cvtColor(tmp, cv2.COLOR_RGB2BGR) | |
| # get parameters from the stylegan and mark them with their layers | |
| def gather_params(G): | |
| params = dict( | |
| [(res, {}) for res in range(18)] + [("others", {})] | |
| ) | |
| for n, p in sorted(list(G.named_buffers()) + list(G.named_parameters())): | |
| if n.startswith("convs"): | |
| layer = int(n.split(".")[1]) + 1 | |
| params[layer][n] = p | |
| elif n.startswith("to_rgbs"): | |
| layer = int(n.split(".")[1]) * 2 + 3 | |
| params[layer][n] = p | |
| elif n.startswith("conv1"): | |
| params[0][n] = p | |
| elif n.startswith("to_rgb1"): | |
| params[1][n] = p | |
| else: | |
| params["others"][n] = p | |
| return params | |
| # blend the ffhq stylegan model and the finetuned model for toonify | |
| # see ``Resolution Dependent GAN Interpolation for Controllable Image Synthesis Between Domains'' | |
| def blend_models(G_low, G_high, weight=[1]*7+[0]*11): | |
| params_low = gather_params(G_low) | |
| params_high = gather_params(G_high) | |
| for res in range(18): | |
| for n, p in params_high[res].items(): | |
| params_high[res][n] = params_high[res][n] * (1-weight[res]) + params_low[res][n] * weight[res] | |
| state_dict = {} | |
| for _, p in params_high.items(): | |
| state_dict.update(p) | |
| return state_dict | |