Spaces:

hanquansanren
/

DvD

Running on Zero

App Files Files Community

hanquansanren commited on Oct 13

Commit

9847dc4

1 Parent(s): d938e9f

I

Browse files

Files changed (1) hide show

app.py +1 -130

app.py CHANGED Viewed

@@ -180,139 +180,10 @@ def prepare_data_single(input_image, input_image_ori):
     return input_image, H_ori, W_ori, source, target, batch_ori, batch_ori_inter, source_256, target_256, source_vis, target_vis, mask, source_0
-@GPU
-def run_evaluation_docunet(
-    settings, logger, val_loader, diffusion: GaussianDiffusion, model,
-    pretrained_dewarp_model,pretrained_line_seg_model=None,pretrained_seg_model=None
-):
-    os.makedirs(f'vis_hp/{settings.env.eval_dataset_name}/{settings.name}', exist_ok=True)
-    batch_preprocessing = None
-    pbar = tqdm(enumerate(val_loader), total=len(val_loader))
-    pyramid = VGGPyramid(train=False).to(dist_util.dev())
-    SIZE = None
-    # for each document image
-    for i, data in pbar:
-        radius = 4
-        raw_corr = None
-        data_path = data['path']
-        source_288 = F.interpolate(data['source_image'], size=(288), mode='bilinear', align_corners=True).to(dist_util.dev())
-        if settings.env.time_variant == True:
-            init_feat = torch.zeros((data['source_image'].shape[0], 256, 64, 64), dtype=torch.float32).to(dist_util.dev())
-        else:
-            init_feat = None
-        with torch.inference_mode():
-            ref_bm, mask_x = pretrained_dewarp_model(source_288) # [1,2,288,288] 0~288  0~1
-            ref_flow = ref_bm/287.0 # [-1, 1]  # [1,2,288,288]
-        if settings.env.use_init_flow:
-            init_flow = F.interpolate(ref_flow, size=(64), mode='bilinear', align_corners=True) # [24, 2, 64, 64]
-        else:
-            init_flow = torch.zeros((data['source_image'].shape[0], 2, 64, 64), dtype=torch.float32).to(dist_util.dev())
-        (
-            data,
-            H_ori, # 512
-            W_ori, # 512
-            source, # [1, 3, 512, 512] 0-1
-            target, # None
-            batch_ori, # None
-            batch_ori_inter, # None
-            source_256,# [1, 3, 256, 256] 0-1
-            target_256, # None
-            source_vis, # [1, 3, H, W] cpu仅用于可视化
-            target_vis, # None
-            mask, # [1, 512, 512] 全白
-            source_0
-        ) = prepare_data(settings, batch_preprocessing, SIZE, data)
-        with torch.no_grad():
-            if settings.env.use_gt_mask == False:
-                # ref_bm, mask_x = self.pretrained_dewarp_model(source_288) # [1,2,288,288] bm 0~288 mskx0-256
-                mskx, d0, hx6, hx5d, hx4d, hx3d, hx2d, hx1d = pretrained_seg_model(source_288)
-                hx6 = F.interpolate(hx6, size=64, mode='bilinear', align_corners=False)
-                hx5d = F.interpolate(hx5d, size=64, mode='bilinear', align_corners=False)
-                hx4d = F.interpolate(hx4d, size=64, mode='bilinear', align_corners=False)
-                hx3d = F.interpolate(hx3d, size=64, mode='bilinear', align_corners=False)
-                hx2d = F.interpolate(hx2d, size=64, mode='bilinear', align_corners=False)
-                hx1d = F.interpolate(hx1d, size=64, mode='bilinear', align_corners=False)
-                seg_map_all = torch.cat((hx6, hx5d, hx4d, hx3d, hx2d, hx1d), dim=1) # [b, 384, 64, 64]
-                # tv_save_image(mskx,"vis_hp/debug_vis/mskx.png")
-                if settings.env.use_line_mask:
-                    textline_map, textline_mask = pretrained_line_seg_model(mskx) # [3, 64, 256, 256]
-                    textline_map = F.interpolate(textline_map, size=64, mode='bilinear', align_corners=False) #  [3, 64, 64, 64]
-            else:
-                seg_map_all = None
-                textline_map = None
-        if settings.env.train_VGG:
-            c20 = None
-            feature_size = 64
-        else:
-            feature_size = 64
-            if settings.env.train_mode == 'stage_1_dit_cat' or settings.env.train_mode =='stage_1_dit_cross':
-                with th.no_grad():
-                    c20  = extract_raw_features_single2(pyramid, source, source_256, feature_size) # [24, 1, 64, 64, 64, 64]
-                # 平均互相关，VGG最浅层特征的下采样（512*512->64*64）
-            else:
-                with th.no_grad():
-                    c20  = extract_raw_features_single(pyramid, source, source_256, feature_size) # [24, 1, 64, 64, 64, 64]
-                # 平均互相关，VGG最浅层特征的下采样（512*512->64*64）
-        source_64 = None # F.interpolate(source, size=(feature_size), mode='bilinear', align_corners=True)
-        logger.info(f"Starting sampling with VGG Features")
-        sample = run_sample_lr_dewarping(
-            settings,
-            logger,
-            diffusion,
-            model,
-            radius, # 4
-            source, # [B, 3, 512, 512] 0~1
-            feature_size, # 64
-            raw_corr, # None
-            init_flow, # [B, 2, 64, 64]   -1~1
-            c20, # # [B, 64, 64, 64]
-            source_64, # None
-            pyramid,
-            mask_x, #mask_x,  # F.interpolate(mskx, size=(512), mode='bilinear', align_corners=True)[:,:1,:,:] , # mask_x
-            seg_map_all,
-            textline_map,
-            init_feat
-        ) # sample: [1, 2, 64, 64] 偏移量 [-1,1]范围 五步DDIM的结果
-        if settings.env.use_sr_net == False:
-            sample = F.interpolate(sample, size=(H_ori, W_ori), mode='bilinear', align_corners=True) # [-1,+1] 偏移场
-            # sample[:, 0, :, :] = sample[:, 0, :, :] * W_ori
-            # sample[:, 1, :, :] = sample[:, 1, :, :] * H_ori
-            base = F.interpolate(coords_grid_tensor((512,512))/511., size=(H_ori, W_ori), mode='bilinear', align_corners=True)
-            # sample = ( ((sample + base.to(sample.device)) )*2 - 1 )
-            sample = ( ((sample + base.to(sample.device))*1 )*2 - 1 )*0.987 #  (2 * (bm / 286.8) - 1) * 0.99
-            ref_flow = None
-            if ref_flow is not None:
-                ref_flow = F.interpolate(ref_flow, size=(H_ori, W_ori), mode='bilinear', align_corners=True) # [-1,+1] 偏移场
-                # ref_flow[:, 0, :, :] = ref_flow[:, 0, :, :] * W_ori
-                # ref_flow[:, 1, :, :] = ref_flow[:, 1, :, :] * H_ori
-                ref_flow  = (ref_flow + base.to(ref_flow.device))*2 -1
-            # init_flow = F.interpolate(init_flow, size=(H_ori, W_ori), mode='bilinear', align_corners=True)
-        else:
-            raise ValueError("Invalid value")
-        if settings.env.visualize:
-            output = visualize_dewarping(settings, sample, data, i, source_vis, data_path, ref_flow)
 def run_single_docunet(input_image_ori):
     input_image_ori = np.array(input_image_ori, dtype=np.uint8)  # [x, y, 3]

     return input_image, H_ori, W_ori, source, target, batch_ori, batch_ori_inter, source_256, target_256, source_vis, target_vis, mask, source_0
+@GPU
 def run_single_docunet(input_image_ori):
     input_image_ori = np.array(input_image_ori, dtype=np.uint8)  # [x, y, 3]