Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
776d5b3
1
Parent(s):
e03a824
add
Browse files
app.py
CHANGED
|
@@ -15,6 +15,7 @@ from shap_e.models.download import load_model, load_config
|
|
| 15 |
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
|
| 16 |
import spaces
|
| 17 |
from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
|
|
|
|
| 18 |
|
| 19 |
from src.utils.train_util import instantiate_from_config
|
| 20 |
from src.utils.camera_util import (
|
|
@@ -27,15 +28,14 @@ from src.utils.mesh_util import save_obj, save_glb
|
|
| 27 |
from src.utils.infer_util import remove_background, resize_foreground
|
| 28 |
|
| 29 |
def create_custom_cameras(size: int, device: torch.device, azimuths: list, elevations: list,
|
| 30 |
-
fov_degrees: float,distance) -> DifferentiableCameraBatch:
|
| 31 |
# Object is in a 2x2x2 bounding box (-1 to 1 in each dimension)
|
| 32 |
-
object_diagonal =
|
| 33 |
|
| 34 |
# Calculate radius based on object size and FOV
|
| 35 |
fov_radians = math.radians(fov_degrees)
|
| 36 |
radius = (object_diagonal / 2) / math.tan(fov_radians / 2) # Correct radius calculation
|
| 37 |
-
|
| 38 |
-
# exit(0)
|
| 39 |
origins = []
|
| 40 |
xs = []
|
| 41 |
ys = []
|
|
@@ -75,8 +75,6 @@ def create_custom_cameras(size: int, device: torch.device, azimuths: list, eleva
|
|
| 75 |
),
|
| 76 |
)
|
| 77 |
|
| 78 |
-
|
| 79 |
-
@spaces.GPU(duration=60)
|
| 80 |
def load_models():
|
| 81 |
"""Initialize and load all required models"""
|
| 82 |
config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
|
|
@@ -231,15 +229,22 @@ def create_mesh(refined_image, model, infer_config):
|
|
| 231 |
|
| 232 |
class ShapERenderer:
|
| 233 |
def __init__(self, device):
|
| 234 |
-
print("
|
| 235 |
self.device = device
|
| 236 |
-
self.xm =
|
| 237 |
-
self.model =
|
| 238 |
-
self.diffusion =
|
| 239 |
-
print("Shap-E models
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
-
@spaces.GPU(duration=60)
|
| 242 |
def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
|
|
|
|
|
|
|
| 243 |
# Generate latents using the text-to-3D model
|
| 244 |
batch_size = 1
|
| 245 |
guidance_scale = float(guidance_scale)
|
|
@@ -272,13 +277,13 @@ class ShapERenderer:
|
|
| 272 |
rendered_image = decode_latent_images(
|
| 273 |
self.xm,
|
| 274 |
latents[0],
|
| 275 |
-
|
| 276 |
-
|
| 277 |
)
|
| 278 |
-
images.append(rendered_image
|
| 279 |
|
| 280 |
# Convert images to uint8
|
| 281 |
-
images = [(image)
|
| 282 |
|
| 283 |
# Create 2x3 grid layout (640x960) instead of 3x2 (960x640)
|
| 284 |
layout = np.zeros((960, 640, 3), dtype=np.uint8)
|
|
@@ -292,12 +297,19 @@ class ShapERenderer:
|
|
| 292 |
class RefinerInterface:
|
| 293 |
def __init__(self):
|
| 294 |
print("Initializing InstantMesh models...")
|
| 295 |
-
self.pipeline
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
-
@spaces.GPU(duration=65)
|
| 299 |
def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
|
| 300 |
"""Main refinement function"""
|
|
|
|
|
|
|
| 301 |
# Process image and get refined output
|
| 302 |
input_image = Image.fromarray(input_image)
|
| 303 |
|
|
@@ -434,11 +446,13 @@ def create_demo():
|
|
| 434 |
)
|
| 435 |
|
| 436 |
# Set up event handlers
|
|
|
|
| 437 |
def generate(prompt, guidance_scale, num_steps):
|
| 438 |
with torch.no_grad():
|
| 439 |
layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
|
| 440 |
return layout
|
| 441 |
|
|
|
|
| 442 |
def refine(input_image, prompt, steps, guidance_scale):
|
| 443 |
refined_img, mesh_path = refiner.refine_model(
|
| 444 |
input_image,
|
|
|
|
| 15 |
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
|
| 16 |
import spaces
|
| 17 |
from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
|
| 18 |
+
import math
|
| 19 |
|
| 20 |
from src.utils.train_util import instantiate_from_config
|
| 21 |
from src.utils.camera_util import (
|
|
|
|
| 28 |
from src.utils.infer_util import remove_background, resize_foreground
|
| 29 |
|
| 30 |
def create_custom_cameras(size: int, device: torch.device, azimuths: list, elevations: list,
|
| 31 |
+
fov_degrees: float, distance: float) -> DifferentiableCameraBatch:
|
| 32 |
# Object is in a 2x2x2 bounding box (-1 to 1 in each dimension)
|
| 33 |
+
object_diagonal = distance # Correct diagonal calculation for the cube
|
| 34 |
|
| 35 |
# Calculate radius based on object size and FOV
|
| 36 |
fov_radians = math.radians(fov_degrees)
|
| 37 |
radius = (object_diagonal / 2) / math.tan(fov_radians / 2) # Correct radius calculation
|
| 38 |
+
|
|
|
|
| 39 |
origins = []
|
| 40 |
xs = []
|
| 41 |
ys = []
|
|
|
|
| 75 |
),
|
| 76 |
)
|
| 77 |
|
|
|
|
|
|
|
| 78 |
def load_models():
|
| 79 |
"""Initialize and load all required models"""
|
| 80 |
config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
|
|
|
|
| 229 |
|
| 230 |
class ShapERenderer:
|
| 231 |
def __init__(self, device):
|
| 232 |
+
print("Initializing Shap-E models...")
|
| 233 |
self.device = device
|
| 234 |
+
self.xm = None
|
| 235 |
+
self.model = None
|
| 236 |
+
self.diffusion = None
|
| 237 |
+
print("Shap-E models initialized!")
|
| 238 |
+
|
| 239 |
+
def ensure_models_loaded(self):
|
| 240 |
+
if self.model is None:
|
| 241 |
+
self.xm = load_model('transmitter', device=self.device)
|
| 242 |
+
self.model = load_model('text300M', device=self.device)
|
| 243 |
+
self.diffusion = diffusion_from_config(load_config('diffusion'))
|
| 244 |
|
|
|
|
| 245 |
def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
|
| 246 |
+
self.ensure_models_loaded()
|
| 247 |
+
|
| 248 |
# Generate latents using the text-to-3D model
|
| 249 |
batch_size = 1
|
| 250 |
guidance_scale = float(guidance_scale)
|
|
|
|
| 277 |
rendered_image = decode_latent_images(
|
| 278 |
self.xm,
|
| 279 |
latents[0],
|
| 280 |
+
cameras=cameras,
|
| 281 |
+
rendering_mode='stf'
|
| 282 |
)
|
| 283 |
+
images.append(rendered_image[0])
|
| 284 |
|
| 285 |
# Convert images to uint8
|
| 286 |
+
images = [np.array(image) for image in images]
|
| 287 |
|
| 288 |
# Create 2x3 grid layout (640x960) instead of 3x2 (960x640)
|
| 289 |
layout = np.zeros((960, 640, 3), dtype=np.uint8)
|
|
|
|
| 297 |
class RefinerInterface:
|
| 298 |
def __init__(self):
|
| 299 |
print("Initializing InstantMesh models...")
|
| 300 |
+
self.pipeline = None
|
| 301 |
+
self.model = None
|
| 302 |
+
self.infer_config = None
|
| 303 |
+
print("InstantMesh models initialized!")
|
| 304 |
+
|
| 305 |
+
def ensure_models_loaded(self):
|
| 306 |
+
if self.pipeline is None:
|
| 307 |
+
self.pipeline, self.model, self.infer_config = load_models()
|
| 308 |
|
|
|
|
| 309 |
def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
|
| 310 |
"""Main refinement function"""
|
| 311 |
+
self.ensure_models_loaded()
|
| 312 |
+
|
| 313 |
# Process image and get refined output
|
| 314 |
input_image = Image.fromarray(input_image)
|
| 315 |
|
|
|
|
| 446 |
)
|
| 447 |
|
| 448 |
# Set up event handlers
|
| 449 |
+
@spaces.GPU(duration=60)
|
| 450 |
def generate(prompt, guidance_scale, num_steps):
|
| 451 |
with torch.no_grad():
|
| 452 |
layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
|
| 453 |
return layout
|
| 454 |
|
| 455 |
+
@spaces.GPU(duration=60)
|
| 456 |
def refine(input_image, prompt, steps, guidance_scale):
|
| 457 |
refined_img, mesh_path = refiner.refine_model(
|
| 458 |
input_image,
|
app2.py
CHANGED
|
@@ -12,7 +12,8 @@ from einops import rearrange
|
|
| 12 |
from shap_e.diffusion.sample import sample_latents
|
| 13 |
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
|
| 14 |
from shap_e.models.download import load_model, load_config
|
| 15 |
-
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
|
|
|
|
| 16 |
|
| 17 |
from src.utils.train_util import instantiate_from_config
|
| 18 |
from src.utils.camera_util import (
|
|
|
|
| 12 |
from shap_e.diffusion.sample import sample_latents
|
| 13 |
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
|
| 14 |
from shap_e.models.download import load_model, load_config
|
| 15 |
+
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
|
| 16 |
+
from util import create_custom_cameras
|
| 17 |
|
| 18 |
from src.utils.train_util import instantiate_from_config
|
| 19 |
from src.utils.camera_util import (
|