Spaces:
Running
on
Zero
Running
on
Zero
| #!/usr/bin/env python3 | |
| # Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT | |
| # except for the third-party components listed below. | |
| # Hunyuan 3D does not impose any additional limitations beyond what is outlined | |
| # in the repsective licenses of these third-party components. | |
| # Users must comply with all terms and conditions of original licenses of these third-party | |
| # components and must ensure that the usage of the third party components adheres to | |
| # all relevant laws and regulations. | |
| # For avoidance of doubts, Hunyuan 3D means the large language models and | |
| # their software and algorithms, including trained model weights, parameters (including | |
| # optimizer states), machine-learning model code, inference-enabling code, training-enabling code, | |
| # fine-tuning enabling code and other elements of the foregoing made publicly available | |
| # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. | |
| import os | |
| import cv2 | |
| import json | |
| import random | |
| import numpy as np | |
| import torch | |
| from torch.utils.data import Dataset | |
| from PIL import Image, ImageOps, ImageChops | |
| class BaseDataset(Dataset): | |
| def __init__(self, json_path, num_view=4, image_size=512): | |
| self.data = list() | |
| self.num_view = num_view | |
| self.image_size = image_size | |
| if isinstance(json_path, str): | |
| json_path = [json_path] | |
| for jp in json_path: | |
| with open(jp) as f: | |
| self.data.extend(json.load(f)) | |
| print("============= length of dataset %d =============" % len(self.data)) | |
| def __len__(self): | |
| return len(self.data) | |
| def load_image(self, pil_img, color, image_size=None): | |
| if image_size is None: | |
| image_size = self.image_size | |
| if isinstance(pil_img, str): | |
| pil_img = Image.open(pil_img) | |
| else: | |
| pil_img = pil_img | |
| if pil_img.mode == "L": | |
| pil_img = pil_img.convert("RGB") | |
| pil_img = pil_img.resize((image_size, image_size)) | |
| image = np.asarray(pil_img, dtype=np.float32) / 255.0 | |
| if image.shape[2] == 3: | |
| image = image[:, :, :3] | |
| alpha = np.ones_like(image) | |
| else: | |
| alpha = image[:, :, 3:] | |
| image = image[:, :, :3] * alpha + color * (1 - alpha) | |
| image = torch.from_numpy(image).permute(2, 0, 1).contiguous().float() | |
| alpha = torch.from_numpy(alpha).permute(2, 0, 1).contiguous().float() | |
| return image, alpha | |
| def _apply_scaling(self, image, scale_factor, width, height, bg_color, scale_width=True): | |
| """Apply scaling to image with proper cropping or padding.""" | |
| if scale_width: | |
| new_width = int(width * scale_factor) | |
| new_height = height | |
| else: | |
| new_width = width | |
| new_height = int(height * scale_factor) | |
| image = image.resize((new_width, new_height), resample=Image.BILINEAR) | |
| if scale_factor > 1.0: | |
| # Crop to original size | |
| left = (new_width - width) // 2 | |
| top = (new_height - height) // 2 | |
| image = image.crop((left, top, left + width, top + height)) | |
| else: | |
| # Pad to original size | |
| pad_width = (width - new_width) // 2 | |
| pad_height = (height - new_height) // 2 | |
| image = ImageOps.expand( | |
| image, | |
| ( | |
| pad_width, | |
| pad_height, | |
| width - new_width - pad_width, | |
| height - new_height - pad_height, | |
| ), | |
| fill=bg_color, | |
| ) | |
| return image | |
| def _apply_rotation(self, image, bg_color): | |
| """Apply random rotation to image.""" | |
| original_size = image.size | |
| angle = random.uniform(-30, 30) | |
| image = image.convert("RGBA") | |
| rotated_image = image.rotate(angle, resample=Image.BILINEAR, expand=True) | |
| # Create background with bg_color | |
| background = Image.new("RGBA", rotated_image.size, (bg_color[0], bg_color[1], bg_color[2], 255)) | |
| background.paste(rotated_image, (0, 0), rotated_image) | |
| image = background.convert("RGB") | |
| # Crop to original size | |
| left = (image.width - original_size[0]) // 2 | |
| top = (image.height - original_size[1]) // 2 | |
| right = left + original_size[0] | |
| bottom = top + original_size[1] | |
| return image.crop((left, top, right, bottom)) | |
| def _apply_translation(self, image, bg_color): | |
| """Apply random translation to image.""" | |
| max_dx = 0.1 * image.size[0] | |
| max_dy = 0.1 * image.size[1] | |
| dx = int(random.uniform(-max_dx, max_dx)) | |
| dy = int(random.uniform(-max_dy, max_dy)) | |
| image = ImageChops.offset(image, dx, dy) | |
| # Fill edges | |
| width, height = image.size | |
| if dx > 0: | |
| image.paste(bg_color, (0, 0, dx, height)) | |
| elif dx < 0: | |
| image.paste(bg_color, (width + dx, 0, width, height)) | |
| if dy > 0: | |
| image.paste(bg_color, (0, 0, width, dy)) | |
| elif dy < 0: | |
| image.paste(bg_color, (0, height + dy, width, height)) | |
| return image | |
| def _apply_perspective(self, image, bg_color): | |
| """Apply random perspective transformation to image.""" | |
| image_np = np.array(image) | |
| height, width = image_np.shape[:2] | |
| # Define original and new points | |
| original_points = np.float32([[0, 0], [width, 0], [width, height], [0, height]]) | |
| perspective_scale = 0.2 | |
| new_points = np.float32( | |
| [ | |
| [random.uniform(0, width * perspective_scale), random.uniform(0, height * perspective_scale)], | |
| [random.uniform(width * (1 - perspective_scale), width), random.uniform(0, height * perspective_scale)], | |
| [ | |
| random.uniform(width * (1 - perspective_scale), width), | |
| random.uniform(height * (1 - perspective_scale), height), | |
| ], | |
| [ | |
| random.uniform(0, width * perspective_scale), | |
| random.uniform(height * (1 - perspective_scale), height), | |
| ], | |
| ] | |
| ) | |
| matrix = cv2.getPerspectiveTransform(original_points, new_points) | |
| image_np = cv2.warpPerspective( | |
| image_np, matrix, (width, height), borderMode=cv2.BORDER_CONSTANT, borderValue=bg_color | |
| ) | |
| return Image.fromarray(image_np) | |
| def augment_image( | |
| self, | |
| image, | |
| bg_color, | |
| identity_prob=0.5, | |
| rotate_prob=0.3, | |
| scale_prob=0.5, | |
| translate_prob=0.5, | |
| perspective_prob=0.3, | |
| ): | |
| if random.random() < identity_prob: | |
| return image | |
| # Convert torch tensors back to PIL images for augmentation | |
| image = Image.fromarray((image.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)) | |
| bg_color = (int(bg_color[0] * 255), int(bg_color[1] * 255), int(bg_color[2] * 255)) | |
| # Random rotation | |
| if random.random() < rotate_prob: | |
| image = self._apply_rotation(image, bg_color) | |
| # Random scaling | |
| if random.random() < scale_prob: | |
| width, height = image.size | |
| scale_factor = random.uniform(0.8, 1.2) | |
| if random.random() < 0.5: | |
| # Scale both dimensions proportionally | |
| image = self._apply_scaling(image, scale_factor, width, height, bg_color, scale_width=True) | |
| image = self._apply_scaling(image, scale_factor, width, height, bg_color, scale_width=False) | |
| else: | |
| # Scale width then height independently | |
| scale_factor_w = random.uniform(0.8, 1.2) | |
| scale_factor_h = random.uniform(0.8, 1.2) | |
| image = self._apply_scaling(image, scale_factor_w, width, height, bg_color, scale_width=True) | |
| image = self._apply_scaling(image, scale_factor_h, width, height, bg_color, scale_width=False) | |
| # Random translation | |
| if random.random() < translate_prob: | |
| image = self._apply_translation(image, bg_color) | |
| # Random perspective | |
| if random.random() < perspective_prob: | |
| image = self._apply_perspective(image, bg_color) | |
| # Convert back to torch tensors | |
| image = image.convert("RGB") | |
| image = np.asarray(image, dtype=np.float32) / 255.0 | |
| image = torch.from_numpy(image).permute(2, 0, 1).contiguous().float() | |
| return image | |