init project
Browse files- modules/dust3r/cloud_opt/__init__.py +3 -3
- modules/dust3r/cloud_opt/__pycache__/__init__.cpython-312.pyc +0 -0
- modules/dust3r/cloud_opt/__pycache__/base_opt.cpython-312.pyc +0 -0
- modules/dust3r/cloud_opt/base_opt.py +10 -615
- modules/pe3r/__pycache__/demo.cpython-312.pyc +0 -0
- modules/pe3r/__pycache__/models.cpython-312.pyc +0 -0
- modules/pe3r/demo.py +1 -3
- modules/ultralytics/yolo/utils/callbacks/__pycache__/clearml.cpython-312.pyc +0 -0
- modules/ultralytics/yolo/utils/callbacks/__pycache__/comet.cpython-312.pyc +0 -0
- modules/ultralytics/yolo/utils/callbacks/__pycache__/dvc.cpython-312.pyc +0 -0
- modules/ultralytics/yolo/utils/callbacks/__pycache__/hub.cpython-312.pyc +0 -0
- modules/ultralytics/yolo/utils/callbacks/__pycache__/mlflow.cpython-312.pyc +0 -0
- modules/ultralytics/yolo/utils/callbacks/__pycache__/neptune.cpython-312.pyc +0 -0
- modules/ultralytics/yolo/utils/callbacks/__pycache__/raytune.cpython-312.pyc +0 -0
- modules/ultralytics/yolo/utils/callbacks/__pycache__/tensorboard.cpython-312.pyc +0 -0
- modules/ultralytics/yolo/utils/callbacks/__pycache__/wb.cpython-312.pyc +0 -0
modules/dust3r/cloud_opt/__init__.py
CHANGED
|
@@ -22,11 +22,11 @@ def global_aligner(dust3r_output, cog_seg_maps, rev_cog_seg_maps, semantic_feats
|
|
| 22 |
view1, view2, pred1, pred2 = [dust3r_output[k] for k in 'view1 view2 pred1 pred2'.split()]
|
| 23 |
# build the optimizer
|
| 24 |
if mode == GlobalAlignerMode.PointCloudOptimizer:
|
| 25 |
-
net = PointCloudOptimizer(view1, view2, pred1, pred2, cog_seg_maps, rev_cog_seg_maps, semantic_feats, **optim_kw).to(device)
|
| 26 |
elif mode == GlobalAlignerMode.ModularPointCloudOptimizer:
|
| 27 |
-
net = ModularPointCloudOptimizer(view1, view2, pred1, pred2, **optim_kw).to(device)
|
| 28 |
elif mode == GlobalAlignerMode.PairViewer:
|
| 29 |
-
net = PairViewer(view1, view2, pred1, pred2, cog_seg_maps, rev_cog_seg_maps, semantic_feats, **optim_kw).to(device)
|
| 30 |
else:
|
| 31 |
raise NotImplementedError(f'Unknown mode {mode}')
|
| 32 |
|
|
|
|
| 22 |
view1, view2, pred1, pred2 = [dust3r_output[k] for k in 'view1 view2 pred1 pred2'.split()]
|
| 23 |
# build the optimizer
|
| 24 |
if mode == GlobalAlignerMode.PointCloudOptimizer:
|
| 25 |
+
net = PointCloudOptimizer(view1, view2, pred1, pred2, cog_seg_maps, rev_cog_seg_maps, semantic_feats, device, **optim_kw).to(device)
|
| 26 |
elif mode == GlobalAlignerMode.ModularPointCloudOptimizer:
|
| 27 |
+
net = ModularPointCloudOptimizer(view1, view2, pred1, pred2, device, **optim_kw).to(device)
|
| 28 |
elif mode == GlobalAlignerMode.PairViewer:
|
| 29 |
+
net = PairViewer(view1, view2, pred1, pred2, cog_seg_maps, rev_cog_seg_maps, semantic_feats, device, **optim_kw).to(device)
|
| 30 |
else:
|
| 31 |
raise NotImplementedError(f'Unknown mode {mode}')
|
| 32 |
|
modules/dust3r/cloud_opt/__pycache__/__init__.cpython-312.pyc
CHANGED
|
Binary files a/modules/dust3r/cloud_opt/__pycache__/__init__.cpython-312.pyc and b/modules/dust3r/cloud_opt/__pycache__/__init__.cpython-312.pyc differ
|
|
|
modules/dust3r/cloud_opt/__pycache__/base_opt.cpython-312.pyc
CHANGED
|
Binary files a/modules/dust3r/cloud_opt/__pycache__/base_opt.cpython-312.pyc and b/modules/dust3r/cloud_opt/__pycache__/base_opt.cpython-312.pyc differ
|
|
|
modules/dust3r/cloud_opt/base_opt.py
CHANGED
|
@@ -44,7 +44,7 @@ class BasePCOptimizer (nn.Module):
|
|
| 44 |
else:
|
| 45 |
self._init_from_views(*args, **kwargs)
|
| 46 |
|
| 47 |
-
def _init_from_views(self, view1, view2, pred1, pred2, cog_seg_maps, rev_cog_seg_maps, semantic_feats,
|
| 48 |
dist='l2',
|
| 49 |
conf='log',
|
| 50 |
min_conf_thr=3,
|
|
@@ -121,10 +121,10 @@ class BasePCOptimizer (nn.Module):
|
|
| 121 |
self.fix_imgs = rgb(ori_imgs)
|
| 122 |
self.smoothed_imgs = rgb(smoothed_imgs)
|
| 123 |
|
| 124 |
-
self.cogs = [torch.zeros((h, w, 1024)) for h, w in self.imshapes]
|
| 125 |
-
|
| 126 |
-
self.segmaps = [-torch.ones((h, w)) for h, w in self.imshapes]
|
| 127 |
-
self.rev_segmaps = [-torch.ones((h, w)) for h, w in self.imshapes]
|
| 128 |
|
| 129 |
for v in range(len(self.edges)):
|
| 130 |
idx = view1['idx'][v]
|
|
@@ -141,8 +141,8 @@ class BasePCOptimizer (nn.Module):
|
|
| 141 |
seg = cog_seg_map[y, x].squeeze(-1).long()
|
| 142 |
|
| 143 |
self.cogs[idx] = semantic_feats[seg].reshape(h, w, -1)
|
| 144 |
-
self.segmaps[idx] = cog_seg_map
|
| 145 |
-
self.rev_segmaps[idx] = rev_seg_map
|
| 146 |
|
| 147 |
idx = view2['idx'][v]
|
| 148 |
h, w = self.cogs[idx].shape[0], self.cogs[idx].shape[1]
|
|
@@ -157,8 +157,8 @@ class BasePCOptimizer (nn.Module):
|
|
| 157 |
seg = cog_seg_map[y, x].squeeze(-1).long()
|
| 158 |
|
| 159 |
self.cogs[idx] = semantic_feats[seg].reshape(h, w, -1)
|
| 160 |
-
self.segmaps[idx] = cog_seg_map
|
| 161 |
-
self.rev_segmaps[idx] = rev_seg_map
|
| 162 |
|
| 163 |
self.rendered_imgs = []
|
| 164 |
|
|
@@ -612,609 +612,4 @@ def clean_pointcloud( im_confs, K, cams, depthmaps, all_pts3d,
|
|
| 612 |
bad_msk_i[msk_i] = bad_points
|
| 613 |
res[i][bad_msk_i] = res[i][bad_msk_i].clip_(max=bad_conf)
|
| 614 |
|
| 615 |
-
return res
|
| 616 |
-
|
| 617 |
-
# Copyright (C) 2024-present Naver Corporation. All rights reserved.
|
| 618 |
-
# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
|
| 619 |
-
#
|
| 620 |
-
# --------------------------------------------------------
|
| 621 |
-
# Base class for the global alignement procedure
|
| 622 |
-
# --------------------------------------------------------
|
| 623 |
-
# from copy import deepcopy
|
| 624 |
-
|
| 625 |
-
# import numpy as np
|
| 626 |
-
# import torch
|
| 627 |
-
# import torch.nn as nn
|
| 628 |
-
# import roma
|
| 629 |
-
# from copy import deepcopy
|
| 630 |
-
# import tqdm
|
| 631 |
-
|
| 632 |
-
# from torch.nn.functional import cosine_similarity
|
| 633 |
-
# import cv2
|
| 634 |
-
|
| 635 |
-
# from dust3r.utils.geometry import inv, geotrf
|
| 636 |
-
# from dust3r.utils.device import to_numpy
|
| 637 |
-
# from dust3r.utils.image import rgb
|
| 638 |
-
# from dust3r.viz import SceneViz, segment_sky, auto_cam_size
|
| 639 |
-
# from dust3r.optim_factory import adjust_learning_rate_by_lr
|
| 640 |
-
|
| 641 |
-
# from dust3r.cloud_opt.commons import (edge_str, ALL_DISTS, NoGradParamDict, get_imshapes, signed_expm1, signed_log1p,
|
| 642 |
-
# cosine_schedule, linear_schedule, get_conf_trf, GradParamDict)
|
| 643 |
-
# import dust3r.cloud_opt.init_im_poses as init_fun
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
# class BasePCOptimizer (nn.Module):
|
| 647 |
-
# """ Optimize a global scene, given a list of pairwise observations.
|
| 648 |
-
# Graph node: images
|
| 649 |
-
# Graph edges: observations = (pred1, pred2)
|
| 650 |
-
# """
|
| 651 |
-
|
| 652 |
-
# def __init__(self, *args, **kwargs):
|
| 653 |
-
# if len(args) == 1 and len(kwargs) == 0:
|
| 654 |
-
# other = deepcopy(args[0])
|
| 655 |
-
# attrs = '''edges is_symmetrized dist n_imgs pred_i pred_j imshapes
|
| 656 |
-
# min_conf_thr conf_thr conf_i conf_j im_conf
|
| 657 |
-
# base_scale norm_pw_scale POSE_DIM pw_poses
|
| 658 |
-
# pw_adaptors pw_adaptors has_im_poses rand_pose imgs verbose'''.split()
|
| 659 |
-
# self.__dict__.update({k: other[k] for k in attrs})
|
| 660 |
-
# else:
|
| 661 |
-
# self._init_from_views(*args, **kwargs)
|
| 662 |
-
|
| 663 |
-
# def _init_from_views(self, view1, view2, pred1, pred2, cog_seg_maps, rev_cog_seg_maps, semantic_feats,
|
| 664 |
-
# dist='l2',
|
| 665 |
-
# conf='log',
|
| 666 |
-
# min_conf_thr=3,
|
| 667 |
-
# base_scale=0.5,
|
| 668 |
-
# allow_pw_adaptors=False,
|
| 669 |
-
# pw_break=20,
|
| 670 |
-
# rand_pose=torch.randn,
|
| 671 |
-
# iterationsCount=None,
|
| 672 |
-
# verbose=True):
|
| 673 |
-
# super().__init__()
|
| 674 |
-
# if not isinstance(view1['idx'], list):
|
| 675 |
-
# view1['idx'] = view1['idx'].tolist()
|
| 676 |
-
# if not isinstance(view2['idx'], list):
|
| 677 |
-
# view2['idx'] = view2['idx'].tolist()
|
| 678 |
-
# self.edges = [(int(i), int(j)) for i, j in zip(view1['idx'], view2['idx'])]
|
| 679 |
-
# self.is_symmetrized = set(self.edges) == {(j, i) for i, j in self.edges}
|
| 680 |
-
# self.dist = ALL_DISTS[dist]
|
| 681 |
-
# self.verbose = verbose
|
| 682 |
-
|
| 683 |
-
# self.n_imgs = self._check_edges()
|
| 684 |
-
|
| 685 |
-
# # input data
|
| 686 |
-
# pred1_pts = pred1['pts3d']
|
| 687 |
-
# pred2_pts = pred2['pts3d_in_other_view']
|
| 688 |
-
# self.pred_i = NoGradParamDict({ij: pred1_pts[n] for n, ij in enumerate(self.str_edges)})
|
| 689 |
-
# self.pred_j = NoGradParamDict({ij: pred2_pts[n] for n, ij in enumerate(self.str_edges)})
|
| 690 |
-
# # self.ori_pred_i = NoGradParamDict({ij: pred1_pts[n] for n, ij in enumerate(self.str_edges)})
|
| 691 |
-
# # self.ori_pred_j = NoGradParamDict({ij: pred2_pts[n] for n, ij in enumerate(self.str_edges)})
|
| 692 |
-
# self.imshapes = get_imshapes(self.edges, pred1_pts, pred2_pts)
|
| 693 |
-
|
| 694 |
-
# # work in log-scale with conf
|
| 695 |
-
# pred1_conf = pred1['conf']
|
| 696 |
-
# pred2_conf = pred2['conf']
|
| 697 |
-
# self.min_conf_thr = min_conf_thr
|
| 698 |
-
# self.conf_trf = get_conf_trf(conf)
|
| 699 |
-
|
| 700 |
-
# self.conf_i = NoGradParamDict({ij: pred1_conf[e] for e, ij in enumerate(self.str_edges)})
|
| 701 |
-
# self.conf_j = NoGradParamDict({ij: pred2_conf[e] for e, ij in enumerate(self.str_edges)})
|
| 702 |
-
# self.im_conf = self._compute_img_conf(pred1_conf, pred2_conf)
|
| 703 |
-
# for i in range(len(self.im_conf)):
|
| 704 |
-
# self.im_conf[i].requires_grad = False
|
| 705 |
-
|
| 706 |
-
# # pairwise pose parameters
|
| 707 |
-
# self.base_scale = base_scale
|
| 708 |
-
# self.norm_pw_scale = True
|
| 709 |
-
# self.pw_break = pw_break
|
| 710 |
-
# self.POSE_DIM = 7
|
| 711 |
-
# self.pw_poses = nn.Parameter(rand_pose((self.n_edges, 1+self.POSE_DIM))) # pairwise poses
|
| 712 |
-
# self.pw_poses.requires_grad_(True)
|
| 713 |
-
# self.pw_adaptors = nn.Parameter(torch.zeros((self.n_edges, 2))) # slight xy/z adaptation
|
| 714 |
-
# self.pw_adaptors.requires_grad_(True)
|
| 715 |
-
# self.has_im_poses = False
|
| 716 |
-
# self.rand_pose = rand_pose
|
| 717 |
-
|
| 718 |
-
# # possibly store images for show_pointcloud
|
| 719 |
-
# self.imgs = None
|
| 720 |
-
# if 'img' in view1 and 'img' in view2:
|
| 721 |
-
# imgs = [torch.zeros((3,)+hw) for hw in self.imshapes]
|
| 722 |
-
# smoothed_imgs = [torch.zeros((3,)+hw) for hw in self.imshapes]
|
| 723 |
-
# ori_imgs = [torch.zeros((3,)+hw) for hw in self.imshapes]
|
| 724 |
-
# for v in range(len(self.edges)):
|
| 725 |
-
# idx = view1['idx'][v]
|
| 726 |
-
# imgs[idx] = view1['img'][v]
|
| 727 |
-
# smoothed_imgs[idx] = view1['smoothed_img'][v]
|
| 728 |
-
# ori_imgs[idx] = view1['ori_img'][v]
|
| 729 |
-
|
| 730 |
-
# idx = view2['idx'][v]
|
| 731 |
-
# imgs[idx] = view2['img'][v]
|
| 732 |
-
# smoothed_imgs[idx] = view2['smoothed_img'][v]
|
| 733 |
-
# ori_imgs[idx] = view2['ori_img'][v]
|
| 734 |
-
|
| 735 |
-
# self.imgs = rgb(imgs)
|
| 736 |
-
# self.ori_imgs = rgb(ori_imgs)
|
| 737 |
-
# self.fix_imgs = rgb(ori_imgs)
|
| 738 |
-
# self.smoothed_imgs = rgb(smoothed_imgs)
|
| 739 |
-
|
| 740 |
-
# self.cogs = [torch.zeros((h, w, 1024), device="cuda") for h, w in self.imshapes]
|
| 741 |
-
# semantic_feats = semantic_feats.to("cuda")
|
| 742 |
-
# self.segmaps = [-torch.ones((h, w), device="cuda") for h, w in self.imshapes]
|
| 743 |
-
# self.rev_segmaps = [-torch.ones((h, w), device="cuda") for h, w in self.imshapes]
|
| 744 |
-
# # self.conf_1 = [torch.zeros((h, w), device="cuda") for h, w in self.imshapes]
|
| 745 |
-
# # self.conf_2 = [torch.zeros((h, w), device="cuda") for h, w in self.imshapes]
|
| 746 |
-
# for v in range(len(self.edges)):
|
| 747 |
-
# idx = view1['idx'][v]
|
| 748 |
-
|
| 749 |
-
# h, w = self.cogs[idx].shape[0], self.cogs[idx].shape[1]
|
| 750 |
-
# cog_seg_map = cog_seg_maps[idx]
|
| 751 |
-
# cog_seg_map = torch.from_numpy(cv2.resize(cog_seg_map, [w, h], interpolation=cv2.INTER_NEAREST))
|
| 752 |
-
# rev_seg_map = rev_cog_seg_maps[idx]
|
| 753 |
-
# rev_seg_map = torch.from_numpy(cv2.resize(rev_seg_map, [w, h], interpolation=cv2.INTER_NEAREST))
|
| 754 |
-
|
| 755 |
-
# y, x = torch.meshgrid(torch.arange(0, h), torch.arange(0, w))
|
| 756 |
-
# x = x.reshape(-1, 1)
|
| 757 |
-
# y = y.reshape(-1, 1)
|
| 758 |
-
# seg = cog_seg_map[y, x].squeeze(-1).long()
|
| 759 |
-
|
| 760 |
-
# self.cogs[idx] = semantic_feats[seg].reshape(h, w, -1)
|
| 761 |
-
# self.segmaps[idx] = cog_seg_map.cuda()
|
| 762 |
-
# self.rev_segmaps[idx] = rev_seg_map.cuda()
|
| 763 |
-
|
| 764 |
-
# idx = view2['idx'][v]
|
| 765 |
-
# h, w = self.cogs[idx].shape[0], self.cogs[idx].shape[1]
|
| 766 |
-
# cog_seg_map = cog_seg_maps[idx]
|
| 767 |
-
# cog_seg_map = torch.from_numpy(cv2.resize(cog_seg_map, [w, h], interpolation=cv2.INTER_NEAREST))
|
| 768 |
-
# rev_seg_map = rev_cog_seg_maps[idx]
|
| 769 |
-
# rev_seg_map = torch.from_numpy(cv2.resize(rev_seg_map, [w, h], interpolation=cv2.INTER_NEAREST))
|
| 770 |
-
|
| 771 |
-
# y, x = torch.meshgrid(torch.arange(0, h), torch.arange(0, w))
|
| 772 |
-
# x = x.reshape(-1, 1)
|
| 773 |
-
# y = y.reshape(-1, 1)
|
| 774 |
-
# seg = cog_seg_map[y, x].squeeze(-1).long()
|
| 775 |
-
|
| 776 |
-
# self.cogs[idx] = semantic_feats[seg].reshape(h, w, -1)
|
| 777 |
-
# self.segmaps[idx] = cog_seg_map.cuda()
|
| 778 |
-
# self.rev_segmaps[idx] = rev_seg_map.cuda()
|
| 779 |
-
|
| 780 |
-
# self.rendered_imgs = []
|
| 781 |
-
|
| 782 |
-
# def render_image(self, text_feats, threshold=0.85):
|
| 783 |
-
# self.rendered_imgs = []
|
| 784 |
-
|
| 785 |
-
# # Collect all cosine similarities to compute min-max normalization
|
| 786 |
-
# all_similarities = []
|
| 787 |
-
# for each_cog in self.cogs:
|
| 788 |
-
# similarity_map = cosine_similarity(each_cog.to("cpu"), text_feats.to("cpu").unsqueeze(1), dim=-1)
|
| 789 |
-
# all_similarities.append(similarity_map.squeeze().numpy())
|
| 790 |
-
|
| 791 |
-
# # Flatten and normalize all similarities
|
| 792 |
-
# total_similarities = np.concatenate(all_similarities)
|
| 793 |
-
# min_sim, max_sim = total_similarities.min(), total_similarities.max()
|
| 794 |
-
# normalized_similarities = [(sim - min_sim) / (max_sim - min_sim) for sim in all_similarities]
|
| 795 |
-
|
| 796 |
-
# # Process each image with normalized similarities
|
| 797 |
-
# for i, (each_cog, heatmap) in enumerate(zip(self.cogs, normalized_similarities)):
|
| 798 |
-
# mask = heatmap > threshold
|
| 799 |
-
|
| 800 |
-
# # Scale heatmap for visualization
|
| 801 |
-
# heatmap = np.uint8(255 * heatmap)
|
| 802 |
-
# heatmap_color = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
|
| 803 |
-
|
| 804 |
-
# # Prepare image
|
| 805 |
-
# image = self.fix_imgs[i]
|
| 806 |
-
# image = image * 255.0
|
| 807 |
-
# image = np.clip(image, 0, 255).astype(np.uint8)
|
| 808 |
-
|
| 809 |
-
# # Apply mask and overlay heatmap with red RGB for masked areas
|
| 810 |
-
# mask_indices = np.where(mask) # Get indices where mask is True
|
| 811 |
-
# heatmap_color[mask_indices[0], mask_indices[1]] = [0, 0, 255] # Red color for masked regions
|
| 812 |
-
|
| 813 |
-
# superimposed_img = np.where(np.expand_dims(mask, axis=-1), heatmap_color, image) / 255.0
|
| 814 |
-
|
| 815 |
-
# self.rendered_imgs.append(superimposed_img)
|
| 816 |
-
|
| 817 |
-
# @property
|
| 818 |
-
# def n_edges(self):
|
| 819 |
-
# return len(self.edges)
|
| 820 |
-
|
| 821 |
-
# @property
|
| 822 |
-
# def str_edges(self):
|
| 823 |
-
# return [edge_str(i, j) for i, j in self.edges]
|
| 824 |
-
|
| 825 |
-
# @property
|
| 826 |
-
# def imsizes(self):
|
| 827 |
-
# return [(w, h) for h, w in self.imshapes]
|
| 828 |
-
|
| 829 |
-
# @property
|
| 830 |
-
# def device(self):
|
| 831 |
-
# return next(iter(self.parameters())).device
|
| 832 |
-
|
| 833 |
-
# def state_dict(self, trainable=True):
|
| 834 |
-
# all_params = super().state_dict()
|
| 835 |
-
# return {k: v for k, v in all_params.items() if k.startswith(('_', 'pred_i.', 'pred_j.', 'conf_i.', 'conf_j.')) != trainable}
|
| 836 |
-
|
| 837 |
-
# def load_state_dict(self, data):
|
| 838 |
-
# return super().load_state_dict(self.state_dict(trainable=False) | data)
|
| 839 |
-
|
| 840 |
-
# def _check_edges(self):
|
| 841 |
-
# indices = sorted({i for edge in self.edges for i in edge})
|
| 842 |
-
# assert indices == list(range(len(indices))), 'bad pair indices: missing values '
|
| 843 |
-
# return len(indices)
|
| 844 |
-
|
| 845 |
-
# @torch.no_grad()
|
| 846 |
-
# def _compute_img_conf(self, pred1_conf, pred2_conf):
|
| 847 |
-
# im_conf = nn.ParameterList([torch.zeros(hw, device=self.device) for hw in self.imshapes])
|
| 848 |
-
# for e, (i, j) in enumerate(self.edges):
|
| 849 |
-
# im_conf[i] = torch.maximum(im_conf[i], pred1_conf[e])
|
| 850 |
-
# im_conf[j] = torch.maximum(im_conf[j], pred2_conf[e])
|
| 851 |
-
# return im_conf
|
| 852 |
-
|
| 853 |
-
# def get_adaptors(self):
|
| 854 |
-
# adapt = self.pw_adaptors
|
| 855 |
-
# adapt = torch.cat((adapt[:, 0:1], adapt), dim=-1) # (scale_xy, scale_xy, scale_z)
|
| 856 |
-
# if self.norm_pw_scale: # normalize so that the product == 1
|
| 857 |
-
# adapt = adapt - adapt.mean(dim=1, keepdim=True)
|
| 858 |
-
# return (adapt / self.pw_break).exp()
|
| 859 |
-
|
| 860 |
-
# def _get_poses(self, poses):
|
| 861 |
-
# # normalize rotation
|
| 862 |
-
# Q = poses[:, :4]
|
| 863 |
-
# T = signed_expm1(poses[:, 4:7])
|
| 864 |
-
# RT = roma.RigidUnitQuat(Q, T).normalize().to_homogeneous()
|
| 865 |
-
# return RT
|
| 866 |
-
|
| 867 |
-
# def _set_pose(self, poses, idx, R, T=None, scale=None, force=False):
|
| 868 |
-
# # all poses == cam-to-world
|
| 869 |
-
# pose = poses[idx]
|
| 870 |
-
# if not (pose.requires_grad or force):
|
| 871 |
-
# return pose
|
| 872 |
-
|
| 873 |
-
# if R.shape == (4, 4):
|
| 874 |
-
# assert T is None
|
| 875 |
-
# T = R[:3, 3]
|
| 876 |
-
# R = R[:3, :3]
|
| 877 |
-
|
| 878 |
-
# if R is not None:
|
| 879 |
-
# pose.data[0:4] = roma.rotmat_to_unitquat(R)
|
| 880 |
-
# if T is not None:
|
| 881 |
-
# pose.data[4:7] = signed_log1p(T / (scale or 1)) # translation is function of scale
|
| 882 |
-
|
| 883 |
-
# if scale is not None:
|
| 884 |
-
# assert poses.shape[-1] in (8, 13)
|
| 885 |
-
# pose.data[-1] = np.log(float(scale))
|
| 886 |
-
# return pose
|
| 887 |
-
|
| 888 |
-
# def get_pw_norm_scale_factor(self):
|
| 889 |
-
# if self.norm_pw_scale:
|
| 890 |
-
# # normalize scales so that things cannot go south
|
| 891 |
-
# # we want that exp(scale) ~= self.base_scale
|
| 892 |
-
# return (np.log(self.base_scale) - self.pw_poses[:, -1].mean()).exp()
|
| 893 |
-
# else:
|
| 894 |
-
# return 1 # don't norm scale for known poses
|
| 895 |
-
|
| 896 |
-
# def get_pw_scale(self):
|
| 897 |
-
# scale = self.pw_poses[:, -1].exp() # (n_edges,)
|
| 898 |
-
# scale = scale * self.get_pw_norm_scale_factor()
|
| 899 |
-
# return scale
|
| 900 |
-
|
| 901 |
-
# def get_pw_poses(self): # cam to world
|
| 902 |
-
# RT = self._get_poses(self.pw_poses)
|
| 903 |
-
# scaled_RT = RT.clone()
|
| 904 |
-
# scaled_RT[:, :3] *= self.get_pw_scale().view(-1, 1, 1) # scale the rotation AND translation
|
| 905 |
-
# return scaled_RT
|
| 906 |
-
|
| 907 |
-
# def get_masks(self):
|
| 908 |
-
# return [(conf > self.min_conf_thr) for conf in self.im_conf]
|
| 909 |
-
|
| 910 |
-
# def depth_to_pts3d(self):
|
| 911 |
-
# raise NotImplementedError()
|
| 912 |
-
|
| 913 |
-
# def get_pts3d(self, raw=False):
|
| 914 |
-
# res = self.depth_to_pts3d()
|
| 915 |
-
# if not raw:
|
| 916 |
-
# res = [dm[:h*w].view(h, w, 3) for dm, (h, w) in zip(res, self.imshapes)]
|
| 917 |
-
# return res
|
| 918 |
-
|
| 919 |
-
# def _set_focal(self, idx, focal, force=False):
|
| 920 |
-
# raise NotImplementedError()
|
| 921 |
-
|
| 922 |
-
# def get_focals(self):
|
| 923 |
-
# raise NotImplementedError()
|
| 924 |
-
|
| 925 |
-
# def get_known_focal_mask(self):
|
| 926 |
-
# raise NotImplementedError()
|
| 927 |
-
|
| 928 |
-
# def get_principal_points(self):
|
| 929 |
-
# raise NotImplementedError()
|
| 930 |
-
|
| 931 |
-
# def get_conf(self, mode=None):
|
| 932 |
-
# trf = self.conf_trf if mode is None else get_conf_trf(mode)
|
| 933 |
-
# return [trf(c) for c in self.im_conf]
|
| 934 |
-
|
| 935 |
-
# def get_im_poses(self):
|
| 936 |
-
# raise NotImplementedError()
|
| 937 |
-
|
| 938 |
-
# def _set_depthmap(self, idx, depth, force=False):
|
| 939 |
-
# raise NotImplementedError()
|
| 940 |
-
|
| 941 |
-
# def get_depthmaps(self, raw=False):
|
| 942 |
-
# raise NotImplementedError()
|
| 943 |
-
|
| 944 |
-
# def clean_pointcloud(self, **kw):
|
| 945 |
-
# cams = inv(self.get_im_poses())
|
| 946 |
-
# K = self.get_intrinsics()
|
| 947 |
-
# depthmaps = self.get_depthmaps()
|
| 948 |
-
# all_pts3d = self.get_pts3d()
|
| 949 |
-
|
| 950 |
-
# new_im_confs = clean_pointcloud(self.im_conf, K, cams, depthmaps, all_pts3d, **kw)
|
| 951 |
-
|
| 952 |
-
# for i, new_conf in enumerate(new_im_confs):
|
| 953 |
-
# self.im_conf[i].data[:] = new_conf
|
| 954 |
-
# return self
|
| 955 |
-
|
| 956 |
-
# def forward(self, ret_details=False):
|
| 957 |
-
# pw_poses = self.get_pw_poses() # cam-to-world
|
| 958 |
-
# pw_adapt = self.get_adaptors()
|
| 959 |
-
# proj_pts3d = self.get_pts3d()
|
| 960 |
-
# # pre-compute pixel weights
|
| 961 |
-
# weight_i = {i_j: self.conf_trf(c) for i_j, c in self.conf_i.items()}
|
| 962 |
-
# weight_j = {i_j: self.conf_trf(c) for i_j, c in self.conf_j.items()}
|
| 963 |
-
|
| 964 |
-
# loss = 0
|
| 965 |
-
# if ret_details:
|
| 966 |
-
# details = -torch.ones((self.n_imgs, self.n_imgs))
|
| 967 |
-
|
| 968 |
-
# for e, (i, j) in enumerate(self.edges):
|
| 969 |
-
# i_j = edge_str(i, j)
|
| 970 |
-
# # distance in image i and j
|
| 971 |
-
# aligned_pred_i = geotrf(pw_poses[e], pw_adapt[e] * self.pred_i[i_j])
|
| 972 |
-
# aligned_pred_j = geotrf(pw_poses[e], pw_adapt[e] * self.pred_j[i_j])
|
| 973 |
-
# li = self.dist(proj_pts3d[i], aligned_pred_i, weight=weight_i[i_j]).mean()
|
| 974 |
-
# lj = self.dist(proj_pts3d[j], aligned_pred_j, weight=weight_j[i_j]).mean()
|
| 975 |
-
# loss = loss + li + lj
|
| 976 |
-
|
| 977 |
-
# if ret_details:
|
| 978 |
-
# details[i, j] = li + lj
|
| 979 |
-
# loss /= self.n_edges # average over all pairs
|
| 980 |
-
|
| 981 |
-
# if ret_details:
|
| 982 |
-
# return loss, details
|
| 983 |
-
# return loss
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
# def spatial_select_points(self, point_maps, semantic_maps, confidence_maps):
|
| 987 |
-
# H, W = semantic_maps.shape
|
| 988 |
-
|
| 989 |
-
# # 将点图和语义图调整为二维形式
|
| 990 |
-
# point_map = point_maps.view(-1, 3) # (H*W, 3)
|
| 991 |
-
# semantic_map = semantic_maps.view(-1) # (H*W)
|
| 992 |
-
# confidence_map = confidence_maps.view(-1)
|
| 993 |
-
|
| 994 |
-
# dist_map = torch.zeros_like(semantic_map, dtype=torch.float32)
|
| 995 |
-
# cnt_map = torch.zeros_like(semantic_map, dtype=torch.float32)
|
| 996 |
-
# # near_point_map = torch.zeros_like(point_map, dtype=torch.float32)
|
| 997 |
-
|
| 998 |
-
# # refresh_point_map = point_map.clone()
|
| 999 |
-
# refresh_confidence_map = confidence_map.clone()
|
| 1000 |
-
|
| 1001 |
-
# # 创建图像的索引
|
| 1002 |
-
# row_idx, col_idx = torch.meshgrid(torch.arange(H), torch.arange(W))
|
| 1003 |
-
# row_idx = row_idx.flatten()
|
| 1004 |
-
# col_idx = col_idx.flatten()
|
| 1005 |
-
|
| 1006 |
-
# kernel_size = 7
|
| 1007 |
-
# offset_range = kernel_size // 2
|
| 1008 |
-
# neighbor_offsets = [
|
| 1009 |
-
# (dx, dy) for dx in range(-offset_range, offset_range + 1)
|
| 1010 |
-
# for dy in range(-offset_range, offset_range + 1)
|
| 1011 |
-
# if not (dx == 0 and dy == 0)
|
| 1012 |
-
# ]
|
| 1013 |
-
|
| 1014 |
-
# # 对每个像素点进行计算(仅在当前图像内计算邻域关系)
|
| 1015 |
-
# for offset in neighbor_offsets:
|
| 1016 |
-
# # 计算邻居位置
|
| 1017 |
-
# neighbor_row = row_idx + offset[0]
|
| 1018 |
-
# neighbor_col = col_idx + offset[1]
|
| 1019 |
-
|
| 1020 |
-
# # 确保邻居在图像内部
|
| 1021 |
-
# valid_mask = (neighbor_row >= 0) & (neighbor_row < H) & (neighbor_col >= 0) & (neighbor_col < W)
|
| 1022 |
-
# valid_row = neighbor_row[valid_mask]
|
| 1023 |
-
# valid_col = neighbor_col[valid_mask]
|
| 1024 |
-
|
| 1025 |
-
# # 获取有效像素点的索引
|
| 1026 |
-
# idx = valid_mask.nonzero(as_tuple=True)[0]
|
| 1027 |
-
# neighbor_idx = valid_row * W + valid_col
|
| 1028 |
-
|
| 1029 |
-
# # 获取相邻像素点的语义标签和空间坐标
|
| 1030 |
-
# sem_i = semantic_map[idx]
|
| 1031 |
-
# sem_j = semantic_map[neighbor_idx]
|
| 1032 |
-
# p_i = point_map[idx]
|
| 1033 |
-
# p_j = point_map[neighbor_idx]
|
| 1034 |
-
|
| 1035 |
-
# # 计算空间坐标差异的平方
|
| 1036 |
-
# distance = torch.sum((p_i - p_j)**2, dim=1)
|
| 1037 |
-
|
| 1038 |
-
# same_object = (sem_i == sem_j) & (sem_i != -1) & (sem_j != -1)
|
| 1039 |
-
|
| 1040 |
-
# dist_map[idx] += same_object * distance
|
| 1041 |
-
# cnt_map[idx] += same_object
|
| 1042 |
-
|
| 1043 |
-
# anomaly_point = (dist_map / (cnt_map + 1e-6))
|
| 1044 |
-
# print(anomaly_point, anomaly_point.shape)
|
| 1045 |
-
# anomaly_point = (anomaly_point > 0.001) & (cnt_map != 0)
|
| 1046 |
-
# anomaly_point_idx = anomaly_point.nonzero(as_tuple=True)[0]
|
| 1047 |
-
|
| 1048 |
-
# refresh_confidence_map[anomaly_point_idx] = 0
|
| 1049 |
-
|
| 1050 |
-
# return refresh_confidence_map.view(H, W)
|
| 1051 |
-
|
| 1052 |
-
# @torch.cuda.amp.autocast(enabled=False)
|
| 1053 |
-
# def compute_global_alignment(self, tune_flg=False, init=None, niter_PnP=10, **kw):
|
| 1054 |
-
|
| 1055 |
-
# if tune_flg:
|
| 1056 |
-
# im_conf = nn.ParameterList([torch.zeros(hw, device=self.device) for hw in self.imshapes])
|
| 1057 |
-
# for e, (i, j) in enumerate(self.edges):
|
| 1058 |
-
# i_j = edge_str(i, j)
|
| 1059 |
-
# im_conf[i] = self.spatial_select_points(self.pred_i[i_j], self.rev_segmaps[i], self.conf_i[i_j])
|
| 1060 |
-
# im_conf[j] = self.spatial_select_points(self.pred_j[i_j], self.rev_segmaps[j], self.conf_j[i_j])
|
| 1061 |
-
|
| 1062 |
-
# for i in range(len(self.imgs)):
|
| 1063 |
-
# self.imgs[i] = self.ori_imgs[i]
|
| 1064 |
-
# anomaly_mask = (im_conf[i] == 0)
|
| 1065 |
-
# unique_labels = torch.unique(self.rev_segmaps[i])
|
| 1066 |
-
# for label in unique_labels:
|
| 1067 |
-
# semantic_mask = (self.rev_segmaps[i] == label)
|
| 1068 |
-
# if label == -1:
|
| 1069 |
-
# continue
|
| 1070 |
-
# cover = (semantic_mask & anomaly_mask).sum() / semantic_mask.sum()
|
| 1071 |
-
# if cover > 0.3:
|
| 1072 |
-
# self.imgs[i][semantic_mask.cpu()] = self.smoothed_imgs[i][semantic_mask.cpu()]
|
| 1073 |
-
# for j in range(len(self.imgs)):
|
| 1074 |
-
# if j == i:
|
| 1075 |
-
# continue
|
| 1076 |
-
# semantic_mask = (self.rev_segmaps[j] == label)
|
| 1077 |
-
# self.imgs[j][semantic_mask.cpu()] = self.smoothed_imgs[j][semantic_mask.cpu()]
|
| 1078 |
-
|
| 1079 |
-
# if init is None:
|
| 1080 |
-
# pass
|
| 1081 |
-
# elif init == 'msp' or init == 'mst':
|
| 1082 |
-
# init_fun.init_minimum_spanning_tree(self, niter_PnP=niter_PnP)
|
| 1083 |
-
# elif init == 'known_poses':
|
| 1084 |
-
# init_fun.init_from_known_poses(self, min_conf_thr=self.min_conf_thr,
|
| 1085 |
-
# niter_PnP=niter_PnP)
|
| 1086 |
-
# else:
|
| 1087 |
-
# raise ValueError(f'bad value for {init=}')
|
| 1088 |
-
|
| 1089 |
-
# if tune_flg:
|
| 1090 |
-
# return 0
|
| 1091 |
-
# # loss = 0
|
| 1092 |
-
# loss = global_alignment_loop(self, **kw)
|
| 1093 |
-
# #
|
| 1094 |
-
# # init_fun.init_minimum_spanning_tree(self, niter_PnP=niter_PnP)
|
| 1095 |
-
# return loss
|
| 1096 |
-
|
| 1097 |
-
# @torch.no_grad()
|
| 1098 |
-
# def mask_sky(self):
|
| 1099 |
-
# res = deepcopy(self)
|
| 1100 |
-
# for i in range(self.n_imgs):
|
| 1101 |
-
# sky = segment_sky(self.imgs[i])
|
| 1102 |
-
# res.im_conf[i][sky] = 0
|
| 1103 |
-
# return res
|
| 1104 |
-
|
| 1105 |
-
# def show(self, show_pw_cams=False, show_pw_pts3d=False, cam_size=None, **kw):
|
| 1106 |
-
# viz = SceneViz()
|
| 1107 |
-
# if self.imgs is None:
|
| 1108 |
-
# colors = np.random.randint(0, 256, size=(self.n_imgs, 3))
|
| 1109 |
-
# colors = list(map(tuple, colors.tolist()))
|
| 1110 |
-
# for n in range(self.n_imgs):
|
| 1111 |
-
# viz.add_pointcloud(self.get_pts3d()[n], colors[n], self.get_masks()[n])
|
| 1112 |
-
# else:
|
| 1113 |
-
# viz.add_pointcloud(self.get_pts3d(), self.imgs, self.get_masks())
|
| 1114 |
-
# colors = np.random.randint(256, size=(self.n_imgs, 3))
|
| 1115 |
-
|
| 1116 |
-
# # camera poses
|
| 1117 |
-
# im_poses = to_numpy(self.get_im_poses())
|
| 1118 |
-
# if cam_size is None:
|
| 1119 |
-
# cam_size = auto_cam_size(im_poses)
|
| 1120 |
-
# viz.add_cameras(im_poses, self.get_focals(), colors=colors,
|
| 1121 |
-
# images=self.imgs, imsizes=self.imsizes, cam_size=cam_size)
|
| 1122 |
-
# if show_pw_cams:
|
| 1123 |
-
# pw_poses = self.get_pw_poses()
|
| 1124 |
-
# viz.add_cameras(pw_poses, color=(192, 0, 192), cam_size=cam_size)
|
| 1125 |
-
|
| 1126 |
-
# if show_pw_pts3d:
|
| 1127 |
-
# pts = [geotrf(pw_poses[e], self.pred_i[edge_str(i, j)]) for e, (i, j) in enumerate(self.edges)]
|
| 1128 |
-
# viz.add_pointcloud(pts, (128, 0, 128))
|
| 1129 |
-
|
| 1130 |
-
# viz.show(**kw)
|
| 1131 |
-
# return viz
|
| 1132 |
-
|
| 1133 |
-
|
| 1134 |
-
# def global_alignment_loop(net, lr=0.01, niter=300, schedule='cosine', lr_min=1e-6):
|
| 1135 |
-
# # return net
|
| 1136 |
-
# params = [p for p in net.parameters() if p.requires_grad]
|
| 1137 |
-
# for param in params:
|
| 1138 |
-
# print(param.shape)
|
| 1139 |
-
# if not params:
|
| 1140 |
-
# return net
|
| 1141 |
-
|
| 1142 |
-
# verbose = net.verbose
|
| 1143 |
-
# if verbose:
|
| 1144 |
-
# print('Global alignement - optimizing for:')
|
| 1145 |
-
# print([name for name, value in net.named_parameters() if value.requires_grad])
|
| 1146 |
-
|
| 1147 |
-
# lr_base = lr
|
| 1148 |
-
# optimizer = torch.optim.Adam(params, lr=lr, betas=(0.9, 0.9))
|
| 1149 |
-
|
| 1150 |
-
# loss = float('inf')
|
| 1151 |
-
# if verbose:
|
| 1152 |
-
# with tqdm.tqdm(total=niter) as bar:
|
| 1153 |
-
# while bar.n < bar.total:
|
| 1154 |
-
# loss, lr = global_alignment_iter(net, bar.n, niter, lr_base, lr_min, optimizer, schedule)
|
| 1155 |
-
# bar.set_postfix_str(f'{lr=:g} loss={loss:g}')
|
| 1156 |
-
# bar.update()
|
| 1157 |
-
# else:
|
| 1158 |
-
# for n in range(niter):
|
| 1159 |
-
# loss, _ = global_alignment_iter(net, n, niter, lr_base, lr_min, optimizer, schedule)
|
| 1160 |
-
# return loss
|
| 1161 |
-
|
| 1162 |
-
|
| 1163 |
-
# def global_alignment_iter(net, cur_iter, niter, lr_base, lr_min, optimizer, schedule):
|
| 1164 |
-
# t = cur_iter / niter
|
| 1165 |
-
# if schedule == 'cosine':
|
| 1166 |
-
# lr = cosine_schedule(t, lr_base, lr_min)
|
| 1167 |
-
# elif schedule == 'linear':
|
| 1168 |
-
# lr = linear_schedule(t, lr_base, lr_min)
|
| 1169 |
-
# else:
|
| 1170 |
-
# raise ValueError(f'bad lr {schedule=}')
|
| 1171 |
-
# adjust_learning_rate_by_lr(optimizer, lr)
|
| 1172 |
-
# optimizer.zero_grad()
|
| 1173 |
-
# loss = net(cur_iter)
|
| 1174 |
-
# if loss == 0:
|
| 1175 |
-
# optimizer.step()
|
| 1176 |
-
# return float(loss), lr
|
| 1177 |
-
|
| 1178 |
-
# loss.backward()
|
| 1179 |
-
# optimizer.step()
|
| 1180 |
-
|
| 1181 |
-
# return float(loss), lr
|
| 1182 |
-
|
| 1183 |
-
|
| 1184 |
-
# @torch.no_grad()
|
| 1185 |
-
# def clean_pointcloud( im_confs, K, cams, depthmaps, all_pts3d,
|
| 1186 |
-
# tol=0.001, bad_conf=0, dbg=()):
|
| 1187 |
-
# """ Method:
|
| 1188 |
-
# 1) express all 3d points in each camera coordinate frame
|
| 1189 |
-
# 2) if they're in front of a depthmap --> then lower their confidence
|
| 1190 |
-
# """
|
| 1191 |
-
# assert len(im_confs) == len(cams) == len(K) == len(depthmaps) == len(all_pts3d)
|
| 1192 |
-
# assert 0 <= tol < 1
|
| 1193 |
-
# res = [c.clone() for c in im_confs]
|
| 1194 |
-
|
| 1195 |
-
# # reshape appropriately
|
| 1196 |
-
# all_pts3d = [p.view(*c.shape,3) for p,c in zip(all_pts3d, im_confs)]
|
| 1197 |
-
# depthmaps = [d.view(*c.shape) for d,c in zip(depthmaps, im_confs)]
|
| 1198 |
-
|
| 1199 |
-
# for i, pts3d in enumerate(all_pts3d):
|
| 1200 |
-
# for j in range(len(all_pts3d)):
|
| 1201 |
-
# if i == j: continue
|
| 1202 |
-
|
| 1203 |
-
# # project 3dpts in other view
|
| 1204 |
-
# proj = geotrf(cams[j], pts3d)
|
| 1205 |
-
# proj_depth = proj[:,:,2]
|
| 1206 |
-
# u,v = geotrf(K[j], proj, norm=1, ncol=2).round().long().unbind(-1)
|
| 1207 |
-
|
| 1208 |
-
# # check which points are actually in the visible cone
|
| 1209 |
-
# H, W = im_confs[j].shape
|
| 1210 |
-
# msk_i = (proj_depth > 0) & (0 <= u) & (u < W) & (0 <= v) & (v < H)
|
| 1211 |
-
# msk_j = v[msk_i], u[msk_i]
|
| 1212 |
-
|
| 1213 |
-
# # find bad points = those in front but less confident
|
| 1214 |
-
# bad_points = (proj_depth[msk_i] < (1-tol) * depthmaps[j][msk_j]) & (res[i][msk_i] < res[j][msk_j])
|
| 1215 |
-
|
| 1216 |
-
# bad_msk_i = msk_i.clone()
|
| 1217 |
-
# bad_msk_i[msk_i] = bad_points
|
| 1218 |
-
# res[i][bad_msk_i] = res[i][bad_msk_i].clip_(max=bad_conf)
|
| 1219 |
-
|
| 1220 |
-
# return res
|
|
|
|
| 44 |
else:
|
| 45 |
self._init_from_views(*args, **kwargs)
|
| 46 |
|
| 47 |
+
def _init_from_views(self, view1, view2, pred1, pred2, cog_seg_maps, rev_cog_seg_maps, semantic_feats, device,
|
| 48 |
dist='l2',
|
| 49 |
conf='log',
|
| 50 |
min_conf_thr=3,
|
|
|
|
| 121 |
self.fix_imgs = rgb(ori_imgs)
|
| 122 |
self.smoothed_imgs = rgb(smoothed_imgs)
|
| 123 |
|
| 124 |
+
self.cogs = [torch.zeros((h, w, 1024), device=device) for h, w in self.imshapes]
|
| 125 |
+
semantic_feats = semantic_feats.to(device)
|
| 126 |
+
self.segmaps = [-torch.ones((h, w), device=device) for h, w in self.imshapes]
|
| 127 |
+
self.rev_segmaps = [-torch.ones((h, w), device=device) for h, w in self.imshapes]
|
| 128 |
|
| 129 |
for v in range(len(self.edges)):
|
| 130 |
idx = view1['idx'][v]
|
|
|
|
| 141 |
seg = cog_seg_map[y, x].squeeze(-1).long()
|
| 142 |
|
| 143 |
self.cogs[idx] = semantic_feats[seg].reshape(h, w, -1)
|
| 144 |
+
self.segmaps[idx] = cog_seg_map.to(device)
|
| 145 |
+
self.rev_segmaps[idx] = rev_seg_map.to(device)
|
| 146 |
|
| 147 |
idx = view2['idx'][v]
|
| 148 |
h, w = self.cogs[idx].shape[0], self.cogs[idx].shape[1]
|
|
|
|
| 157 |
seg = cog_seg_map[y, x].squeeze(-1).long()
|
| 158 |
|
| 159 |
self.cogs[idx] = semantic_feats[seg].reshape(h, w, -1)
|
| 160 |
+
self.segmaps[idx] = cog_seg_map.to(device)
|
| 161 |
+
self.rev_segmaps[idx] = rev_seg_map.to(device)
|
| 162 |
|
| 163 |
self.rendered_imgs = []
|
| 164 |
|
|
|
|
| 612 |
bad_msk_i[msk_i] = bad_points
|
| 613 |
res[i][bad_msk_i] = res[i][bad_msk_i].clip_(max=bad_conf)
|
| 614 |
|
| 615 |
+
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/pe3r/__pycache__/demo.cpython-312.pyc
CHANGED
|
Binary files a/modules/pe3r/__pycache__/demo.cpython-312.pyc and b/modules/pe3r/__pycache__/demo.cpython-312.pyc differ
|
|
|
modules/pe3r/__pycache__/models.cpython-312.pyc
CHANGED
|
Binary files a/modules/pe3r/__pycache__/models.cpython-312.pyc and b/modules/pe3r/__pycache__/models.cpython-312.pyc differ
|
|
|
modules/pe3r/demo.py
CHANGED
|
@@ -257,8 +257,6 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
|
|
| 257 |
input_image = mobilesamv2.preprocess(sam1_image)
|
| 258 |
image_embedding = mobilesamv2.image_encoder(input_image)['last_hidden_state']
|
| 259 |
|
| 260 |
-
print(image_embedding.shape)
|
| 261 |
-
|
| 262 |
image_embedding=torch.repeat_interleave(image_embedding, 320, dim=0)
|
| 263 |
prompt_embedding=mobilesamv2.prompt_encoder.get_dense_pe()
|
| 264 |
prompt_embedding=torch.repeat_interleave(prompt_embedding, 320, dim=0)
|
|
@@ -324,7 +322,7 @@ def get_cog_feats(images, pe3r, device):
|
|
| 324 |
if out_frame_idx == 0:
|
| 325 |
continue
|
| 326 |
|
| 327 |
-
sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform)
|
| 328 |
|
| 329 |
for sam1_mask in sam1_masks:
|
| 330 |
flg = 1
|
|
|
|
| 257 |
input_image = mobilesamv2.preprocess(sam1_image)
|
| 258 |
image_embedding = mobilesamv2.image_encoder(input_image)['last_hidden_state']
|
| 259 |
|
|
|
|
|
|
|
| 260 |
image_embedding=torch.repeat_interleave(image_embedding, 320, dim=0)
|
| 261 |
prompt_embedding=mobilesamv2.prompt_encoder.get_dense_pe()
|
| 262 |
prompt_embedding=torch.repeat_interleave(prompt_embedding, 320, dim=0)
|
|
|
|
| 322 |
if out_frame_idx == 0:
|
| 323 |
continue
|
| 324 |
|
| 325 |
+
sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform, device)
|
| 326 |
|
| 327 |
for sam1_mask in sam1_masks:
|
| 328 |
flg = 1
|
modules/ultralytics/yolo/utils/callbacks/__pycache__/clearml.cpython-312.pyc
CHANGED
|
Binary files a/modules/ultralytics/yolo/utils/callbacks/__pycache__/clearml.cpython-312.pyc and b/modules/ultralytics/yolo/utils/callbacks/__pycache__/clearml.cpython-312.pyc differ
|
|
|
modules/ultralytics/yolo/utils/callbacks/__pycache__/comet.cpython-312.pyc
CHANGED
|
Binary files a/modules/ultralytics/yolo/utils/callbacks/__pycache__/comet.cpython-312.pyc and b/modules/ultralytics/yolo/utils/callbacks/__pycache__/comet.cpython-312.pyc differ
|
|
|
modules/ultralytics/yolo/utils/callbacks/__pycache__/dvc.cpython-312.pyc
CHANGED
|
Binary files a/modules/ultralytics/yolo/utils/callbacks/__pycache__/dvc.cpython-312.pyc and b/modules/ultralytics/yolo/utils/callbacks/__pycache__/dvc.cpython-312.pyc differ
|
|
|
modules/ultralytics/yolo/utils/callbacks/__pycache__/hub.cpython-312.pyc
CHANGED
|
Binary files a/modules/ultralytics/yolo/utils/callbacks/__pycache__/hub.cpython-312.pyc and b/modules/ultralytics/yolo/utils/callbacks/__pycache__/hub.cpython-312.pyc differ
|
|
|
modules/ultralytics/yolo/utils/callbacks/__pycache__/mlflow.cpython-312.pyc
CHANGED
|
Binary files a/modules/ultralytics/yolo/utils/callbacks/__pycache__/mlflow.cpython-312.pyc and b/modules/ultralytics/yolo/utils/callbacks/__pycache__/mlflow.cpython-312.pyc differ
|
|
|
modules/ultralytics/yolo/utils/callbacks/__pycache__/neptune.cpython-312.pyc
CHANGED
|
Binary files a/modules/ultralytics/yolo/utils/callbacks/__pycache__/neptune.cpython-312.pyc and b/modules/ultralytics/yolo/utils/callbacks/__pycache__/neptune.cpython-312.pyc differ
|
|
|
modules/ultralytics/yolo/utils/callbacks/__pycache__/raytune.cpython-312.pyc
CHANGED
|
Binary files a/modules/ultralytics/yolo/utils/callbacks/__pycache__/raytune.cpython-312.pyc and b/modules/ultralytics/yolo/utils/callbacks/__pycache__/raytune.cpython-312.pyc differ
|
|
|
modules/ultralytics/yolo/utils/callbacks/__pycache__/tensorboard.cpython-312.pyc
CHANGED
|
Binary files a/modules/ultralytics/yolo/utils/callbacks/__pycache__/tensorboard.cpython-312.pyc and b/modules/ultralytics/yolo/utils/callbacks/__pycache__/tensorboard.cpython-312.pyc differ
|
|
|
modules/ultralytics/yolo/utils/callbacks/__pycache__/wb.cpython-312.pyc
CHANGED
|
Binary files a/modules/ultralytics/yolo/utils/callbacks/__pycache__/wb.cpython-312.pyc and b/modules/ultralytics/yolo/utils/callbacks/__pycache__/wb.cpython-312.pyc differ
|
|
|