Spaces:

imlixinyang
/

FlashWorld-Demo

Running on Zero

App Files Files Community

imlixinyang commited on 26 days ago

Commit

62432f1

1 Parent(s): 854d14d

add spz.

Browse files

Files changed (10) hide show

app.py +31 -37
app_gradio copy.py +682 -0
app_gradio.py +139 -90
index.html +293 -181
models/render.py +4 -2
packages.txt +3 -1
pre-requirements.txt +2 -1
quant.py +1 -2
requirements.txt +2 -1
utils.py +45 -19

app.py CHANGED Viewed

@@ -9,42 +9,36 @@ except ImportError:
 import os
 import subprocess
-# def install_cuda_toolkit():
-#     # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
-#     CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run"
-#     CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
-#     subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
-#     subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
-#     subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
-#     os.environ["CUDA_HOME"] = "/usr/local/cuda"
-#     os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
-#     os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
-#         os.environ["CUDA_HOME"],
-#         "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
-#     )
-#     # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
-#     os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
-#     print("Successfully installed CUDA toolkit at: ", os.environ["CUDA_HOME"])
-#     subprocess.call('rm /usr/bin/gcc', shell=True)
-#     subprocess.call('rm /usr/bin/g++', shell=True)
-#     subprocess.call('rm /usr/local/cuda/bin/gcc', shell=True)
-#     subprocess.call('rm /usr/local/cuda/bin/g++', shell=True)
-#     subprocess.call('ln -s /usr/bin/gcc-11 /usr/bin/gcc', shell=True)
-#     subprocess.call('ln -s /usr/bin/g++-11 /usr/bin/g++', shell=True)
-#     subprocess.call('ln -s /usr/bin/gcc-11 /usr/local/cuda/bin/gcc', shell=True)
-#     subprocess.call('ln -s /usr/bin/g++-11 /usr/local/cuda/bin/g++', shell=True)
-#     subprocess.call('gcc --version', shell=True)
-#     subprocess.call('g++ --version', shell=True)
-# install_cuda_toolkit()
-# subprocess.run('pip install git+https://github.com/nerfstudio-project/gsplat.git@32f2a54d21c7ecb135320bb02b136b7407ae5712 --no-build-isolation --use-pep517', env={'CUDA_HOME': "/usr/local/cuda", "TORCH_CUDA_ARCH_LIST": "8.0;8.6"}, shell=True)
 from flask import Flask, jsonify, request, send_file, render_template
 import base64
@@ -349,7 +343,7 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('--port', type=int, default=7860)
     parser.add_argument("--ckpt", default=None)
-    parser.add_argument("--gpu", type=int, default=2)
     parser.add_argument("--cache_dir", type=str, default="./tmpfiles")
     parser.add_argument("--offload_t5", type=bool, default=False)
     parser.add_argument("--max_concurrent", type=int, default=1, help="Maximum concurrent generation tasks")
@@ -380,7 +374,7 @@ if __name__ == "__main__":
         response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
         return response
-    @spaces.GPU
     def generate_wrapper(cameras, n_frame, image, text_prompt, image_index, image_height, image_width, video_output_path=None):
         """生成函数的包装器，用于并发控制"""
         return generation_system.generate(cameras, n_frame, image, text_prompt, image_index, image_height, image_width, video_output_path)

 import os
 import subprocess
+try:
+    import gsplat
+except ImportError:
+def install_cuda_toolkit():
+    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
+    CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run"
+    CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
+    subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
+    subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
+    subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
+    os.environ["CUDA_HOME"] = "/usr/local/cuda"
+    os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
+    os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
+        os.environ["CUDA_HOME"],
+        "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
+    )
+    # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
+    os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
+    print("Successfully installed CUDA toolkit at: ", os.environ["CUDA_HOME"])
+    subprocess.call('rm /usr/bin/gcc', shell=True)
+    subprocess.call('rm /usr/bin/g++', shell=True)
+    subprocess.call('ln -s /usr/bin/gcc-11 /usr/bin/gcc', shell=True)
+    subprocess.call('ln -s /usr/bin/g++-11 /usr/bin/g++', shell=True)
+    subprocess.call('gcc --version', shell=True)
+    subprocess.call('g++ --version', shell=True)
+install_cuda_toolkit()
+subprocess.run('pip install git+https://github.com/nerfstudio-project/gsplat.git@32f2a54d21c7ecb135320bb02b136b7407ae5712 --no-build-isolation --use-pep517', env={'CUDA_HOME': "/usr/local/cuda", "TORCH_CUDA_ARCH_LIST": "8.0;8.6", "PATH": "/usr/local/cuda/bin/:" + os.environ["PATH"]}, shell=True)
 from flask import Flask, jsonify, request, send_file, render_template
 import base64
     parser = argparse.ArgumentParser()
     parser.add_argument('--port', type=int, default=7860)
     parser.add_argument("--ckpt", default=None)
+    parser.add_argument("--gpu", type=int, default=0)
     parser.add_argument("--cache_dir", type=str, default="./tmpfiles")
     parser.add_argument("--offload_t5", type=bool, default=False)
     parser.add_argument("--max_concurrent", type=int, default=1, help="Maximum concurrent generation tasks")
         response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
         return response
+    @GPU
     def generate_wrapper(cameras, n_frame, image, text_prompt, image_index, image_height, image_width, video_output_path=None):
         """生成函数的包装器，用于并发控制"""
         return generation_system.generate(cameras, n_frame, image, text_prompt, image_index, image_height, image_width, video_output_path)

app_gradio copy.py ADDED Viewed

	@@ -0,0 +1,682 @@

+try:
+    import spaces
+    GPU = spaces.GPU
+    print("spaces GPU is available")
+except ImportError:
+    def GPU(func):
+        return func
+import os
+import subprocess
+try:
+    import gsplat
+except ImportError:
+    def install_cuda_toolkit():
+        # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
+        CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run"
+        CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
+        subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
+        subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
+        subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
+        os.environ["CUDA_HOME"] = "/usr/local/cuda"
+        os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
+        os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
+            os.environ["CUDA_HOME"],
+            "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
+        )
+        # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
+        os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0+PTX"
+        print("Successfully installed CUDA toolkit at: ", os.environ["CUDA_HOME"])
+        subprocess.call('rm /usr/bin/gcc', shell=True)
+        subprocess.call('rm /usr/bin/g++', shell=True)
+        subprocess.call('ln -s /usr/bin/gcc-11 /usr/bin/gcc', shell=True)
+        subprocess.call('ln -s /usr/bin/g++-11 /usr/bin/g++', shell=True)
+        subprocess.call('gcc --version', shell=True)
+        subprocess.call('g++ --version', shell=True)
+    install_cuda_toolkit()
+    os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0+PTX"
+    os.environ["CUDA_HOME"] = "/usr/local/cuda"
+    os.environ["PATH"] = "/usr/local/cuda/bin/:" + os.environ["PATH"]
+    subprocess.run('pip install git+https://github.com/nerfstudio-project/gsplat.git@32f2a54d21c7ecb135320bb02b136b7407ae5712',
+        env={'CUDA_HOME': "/usr/local/cuda", "TORCH_CUDA_ARCH_LIST": "9.0+PTX", "PATH": "/usr/local/cuda/bin/:" + os.environ["PATH"]}, shell=True)
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
+import gradio as gr
+import base64
+import io
+from PIL import Image
+import torch
+import numpy as np
+import os
+import argparse
+import imageio
+import json
+import time
+import tempfile
+import shutil
+from huggingface_hub import hf_hub_download
+import einops
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import imageio
+from models import *
+from utils import *
+from transformers import T5TokenizerFast, UMT5EncoderModel
+from diffusers import FlowMatchEulerDiscreteScheduler
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+class MyFlowMatchEulerDiscreteScheduler(FlowMatchEulerDiscreteScheduler):
+    def index_for_timestep(self, timestep, schedule_timesteps=None):
+        if schedule_timesteps is None:
+            schedule_timesteps = self.timesteps
+        return torch.argmin(
+            (timestep - schedule_timesteps.to(timestep.device)).abs(), dim=0).item()
+class GenerationSystem(nn.Module):
+    def __init__(self, ckpt_path=None, device="cuda:0", offload_t5=False, offload_vae=False):
+        super().__init__()
+        self.device = device
+        self.offload_t5 = offload_t5
+        self.offload_vae = offload_vae
+        self.latent_dim = 48
+        self.temporal_downsample_factor = 4
+        self.spatial_downsample_factor = 16
+        self.feat_dim = 1024
+        self.latent_patch_size = 2
+        self.denoising_steps = [0, 250, 500, 750]
+        model_id = "Wan-AI/Wan2.2-TI2V-5B-Diffusers"
+        self.vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float).eval()
+        from models.autoencoder_kl_wan import WanCausalConv3d
+        with torch.no_grad():
+            for name, module in self.vae.named_modules():
+                if isinstance(module, WanCausalConv3d):
+                    time_pad = module._padding[4]
+                    module.padding = (0, module._padding[2], module._padding[0])
+                    module._padding = (0, 0, 0, 0, 0, 0)
+                    module.weight = torch.nn.Parameter(module.weight[:, :, time_pad:].clone())
+        self.vae.requires_grad_(False)
+        self.register_buffer('latents_mean', torch.tensor(self.vae.config.latents_mean).float().view(1, self.vae.config.z_dim, 1, 1, 1).to(self.device))
+        self.register_buffer('latents_std', torch.tensor(self.vae.config.latents_std).float().view(1, self.vae.config.z_dim, 1, 1, 1).to(self.device))
+        self.tokenizer = T5TokenizerFast.from_pretrained(model_id, subfolder="tokenizer")
+        self.text_encoder = UMT5EncoderModel.from_pretrained(model_id, subfolder="text_encoder", torch_dtype=torch.float32).eval().requires_grad_(False).to(self.device if not self.offload_t5 else "cpu")
+        self.transformer = WanTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.float32).train().requires_grad_(False)
+        self.transformer.patch_embedding.weight = nn.Parameter(F.pad(self.transformer.patch_embedding.weight, (0, 0, 0, 0, 0, 0, 0, 6 + self.latent_dim)))
+        # self.transformer.rope.freqs_f[:] = self.transformer.rope.freqs_f[:1]
+        weight = self.transformer.proj_out.weight.reshape(self.latent_patch_size ** 2, self.latent_dim, self.transformer.proj_out.weight.shape[1])
+        bias = self.transformer.proj_out.bias.reshape(self.latent_patch_size ** 2, self.latent_dim)
+        extra_weight = torch.randn(self.latent_patch_size ** 2, self.feat_dim, self.transformer.proj_out.weight.shape[1]) * 0.02
+        extra_bias = torch.zeros(self.latent_patch_size ** 2, self.feat_dim)
+        self.transformer.proj_out.weight = nn.Parameter(torch.cat([weight, extra_weight], dim=1).flatten(0, 1).detach().clone())
+        self.transformer.proj_out.bias = nn.Parameter(torch.cat([bias, extra_bias], dim=1).flatten(0, 1).detach().clone())
+        self.recon_decoder = WANDecoderPixelAligned3DGSReconstructionModel(self.vae, self.feat_dim, use_render_checkpointing=True, use_network_checkpointing=False).train().requires_grad_(False).to(self.device)
+        self.scheduler = MyFlowMatchEulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler", shift=3)
+        self.register_buffer('timesteps', self.scheduler.timesteps.clone().to(self.device))
+        self.transformer.disable_gradient_checkpointing()
+        self.transformer.gradient_checkpointing = False
+        self.add_feedback_for_transformer()
+        if ckpt_path is not None:
+            state_dict = torch.load(ckpt_path, map_location="cpu", weights_only=False)
+            self.transformer.load_state_dict(state_dict["transformer"])
+            self.recon_decoder.load_state_dict(state_dict["recon_decoder"])
+            print(f"Loaded {ckpt_path}.")
+        from quant import FluxFp8GeMMProcessor
+        FluxFp8GeMMProcessor(self.transformer)
+        del self.vae.post_quant_conv, self.vae.decoder
+        self.vae.to(self.device if not self.offload_vae else "cpu")
+        self.vae.to(torch.bfloat16)
+        self.transformer.to(self.device)
+    def latent_scale_fn(self, x):
+        return (x - self.latents_mean) / self.latents_std
+    def latent_unscale_fn(self, x):
+        return x * self.latents_std + self.latents_mean
+    def add_feedback_for_transformer(self):
+        self.use_feedback = True
+        self.transformer.patch_embedding.weight = nn.Parameter(F.pad(self.transformer.patch_embedding.weight, (0, 0, 0, 0, 0, 0, 0, self.feat_dim + self.latent_dim)))
+    def encode_text(self, texts):
+        max_sequence_length = 512
+        text_inputs = self.tokenizer(
+            texts,
+            padding="max_length",
+            max_length=max_sequence_length,
+            truncation=True,
+            add_special_tokens=True,
+            return_attention_mask=True,
+            return_tensors="pt",
+        )
+        if getattr(self, "offload_t5", False):
+            text_input_ids = text_inputs.input_ids.to("cpu")
+            mask = text_inputs.attention_mask.to("cpu")
+        else:
+            text_input_ids = text_inputs.input_ids.to(self.device)
+            mask = text_inputs.attention_mask.to(self.device)
+        seq_lens = mask.gt(0).sum(dim=1).long()
+        if getattr(self, "offload_t5", False):
+            with torch.no_grad():
+                text_embeds = self.text_encoder(text_input_ids, mask).last_hidden_state.to(self.device)
+        else:
+            text_embeds = self.text_encoder(text_input_ids, mask).last_hidden_state
+        text_embeds = [u[:v] for u, v in zip(text_embeds, seq_lens)]
+        text_embeds = torch.stack(
+            [torch.cat([u, u.new_zeros(max_sequence_length - u.size(0), u.size(1))]) for u in text_embeds], dim=0
+        )
+        return text_embeds.float()
+    def forward_generator(self, noisy_latents, raymaps, condition_latents, t, text_embeds, cameras, render_cameras, image_height, image_width, need_3d_mode=True):
+        out = self.transformer(
+            hidden_states=torch.cat([noisy_latents, raymaps, condition_latents], dim=1),
+            timestep=t,
+            encoder_hidden_states=text_embeds,
+            return_dict=False,
+        )[0]
+        v_pred, feats = out.split([self.latent_dim, self.feat_dim], dim=1)
+        sigma = torch.stack([self.scheduler.sigmas[self.scheduler.index_for_timestep(_t)] for _t in t.unbind(0)], dim=0).to(self.device)
+        latents_pred_2d = noisy_latents - sigma * v_pred
+        if need_3d_mode:
+            scene_params = self.recon_decoder(
+                                einops.rearrange(feats, 'B C T H W -> (B T) C H W').unsqueeze(2),
+                                einops.rearrange(self.latent_unscale_fn(latents_pred_2d.detach()), 'B C T H W -> (B T) C H W').unsqueeze(2),
+                                cameras
+                            ).flatten(1, -2)
+            images_pred, _ = self.recon_decoder.render(scene_params.unbind(0), render_cameras, image_height, image_width, bg_mode="white")
+            latents_pred_3d = einops.rearrange(self.latent_scale_fn(self.vae.encode(
+                            einops.rearrange(images_pred, 'B T C H W -> (B T) C H W', T=images_pred.shape[1]).unsqueeze(2).to(self.device if not self.offload_vae else "cpu").float()
+                        ).latent_dist.sample().to(self.device)).squeeze(2), '(B T) C H W -> B C T H W', T=images_pred.shape[1]).to(noisy_latents.dtype)
+        return {
+            '2d': latents_pred_2d,
+            '3d': latents_pred_3d if need_3d_mode else None,
+            'rgb_3d': images_pred if need_3d_mode else None,
+            'scene': scene_params if need_3d_mode else None,
+            'feat': feats
+        }
+    @torch.no_grad()
+    def generate(self, cameras, n_frame, image=None, text="", image_index=0, image_height=480, image_width=704, video_output_path=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.vae.to(self.device)
+        self.text_encoder.to(self.device if not self.offload_t5 else "cpu")
+        self.transformer.to(self.device)
+        self.recon_decoder.to(self.device)
+        self.timesteps = self.timesteps.to(self.device)
+        self.latents_mean = self.latents_mean.to(self.device)
+        self.latents_std = self.latents_std.to(self.device)
+        with torch.amp.autocast(dtype=torch.bfloat16, device_type="cuda"):
+            batch_size = 1
+            cameras = cameras.to(self.device).unsqueeze(0)
+            if cameras.shape[1] != n_frame:
+                render_cameras = cameras.clone()
+                cameras = sample_from_dense_cameras(cameras.squeeze(0), torch.linspace(0, 1, n_frame, device=self.device)).unsqueeze(0)
+            else:
+                render_cameras = cameras
+            cameras, ref_w2c, T_norm = normalize_cameras(cameras, return_meta=True, n_frame=None)
+            render_cameras = normalize_cameras(render_cameras, ref_w2c=ref_w2c, T_norm=T_norm, n_frame=None)
+            text = "[Static] " + text
+            text_embeds = self.encode_text([text])
+            # neg_text_embeds = self.encode_text([""]).repeat(batch_size, 1, 1)
+            masks = torch.zeros(batch_size, n_frame, device=self.device)
+            condition_latents = torch.zeros(batch_size, self.latent_dim, n_frame, image_height // self.spatial_downsample_factor, image_width // self.spatial_downsample_factor, device=self.device)
+            if image is not None:
+                image = image.to(self.device)
+                latent = self.latent_scale_fn(self.vae.encode(
+                        image.unsqueeze(0).unsqueeze(2).to(self.device if not self.offload_vae else "cpu").float()
+                    ).latent_dist.sample().to(self.device)).squeeze(2)
+                masks[:, image_index] = 1
+                condition_latents[:, :, image_index] = latent
+            raymaps = create_raymaps(cameras, image_height // self.spatial_downsample_factor, image_width // self.spatial_downsample_factor)
+            raymaps = einops.rearrange(raymaps, 'B T H W C -> B C T H W', T=n_frame)
+            noise = torch.randn(batch_size, self.latent_dim, n_frame, image_height // self.spatial_downsample_factor, image_width // self.spatial_downsample_factor, device=self.device)
+            noisy_latents = noise
+            torch.cuda.empty_cache()
+            if self.use_feedback:
+                prev_latents_pred = torch.zeros(batch_size, self.latent_dim, n_frame, image_height // self.spatial_downsample_factor, image_width // self.spatial_downsample_factor, device=self.device)
+                prev_feats = torch.zeros(batch_size, self.feat_dim, n_frame, image_height // self.spatial_downsample_factor, image_width // self.spatial_downsample_factor, device=self.device)
+            for i in range(len(self.denoising_steps)):
+                t_ids = torch.full((noisy_latents.shape[0],), self.denoising_steps[i], device=self.device)
+                t = self.timesteps[t_ids]
+                if self.use_feedback:
+                    _condition_latents = torch.cat([condition_latents, prev_feats, prev_latents_pred], dim=1)
+                else:
+                    _condition_latents = condition_latents
+                if i < len(self.denoising_steps) - 1:
+                    out = self.forward_generator(noisy_latents, raymaps, _condition_latents, t, text_embeds, cameras, cameras, image_height, image_width, need_3d_mode=True)
+                    latents_pred = out["3d"]
+                    if self.use_feedback:
+                        prev_latents_pred = latents_pred
+                        prev_feats = out['feat']
+                    noisy_latents = self.scheduler.scale_noise(latents_pred, self.timesteps[torch.full((noisy_latents.shape[0],), self.denoising_steps[i + 1], device=self.device)], torch.randn_like(noise))
+                else:
+                    out = self.transformer(
+                        hidden_states=torch.cat([noisy_latents, raymaps, _condition_latents], dim=1),
+                        timestep=t,
+                        encoder_hidden_states=text_embeds,
+                        return_dict=False,
+                    )[0]
+                    v_pred, feats = out.split([self.latent_dim, self.feat_dim], dim=1)
+                    sigma = torch.stack([self.scheduler.sigmas[self.scheduler.index_for_timestep(_t)] for _t in t.unbind(0)], dim=0).to(self.device)
+                    latents_pred = noisy_latents - sigma * v_pred
+                    scene_params = self.recon_decoder(
+                                        einops.rearrange(feats, 'B C T H W -> (B T) C H W').unsqueeze(2),
+                                        einops.rearrange(self.latent_unscale_fn(latents_pred.detach()), 'B C T H W -> (B T) C H W').unsqueeze(2),
+                                        cameras
+                                    ).flatten(1, -2)
+            if video_output_path is not None:
+                interpolated_images_pred, _ = self.recon_decoder.render(scene_params.unbind(0), render_cameras, image_height, image_width, bg_mode="white")
+                interpolated_images_pred = einops.rearrange(interpolated_images_pred[0].clamp(-1, 1).add(1).div(2), 'T C H W -> T H W C')
+                interpolated_images_pred = [torch.cat([img], dim=1).detach().cpu().mul(255).numpy().astype(np.uint8) for i, img in enumerate(interpolated_images_pred.unbind(0))]
+                imageio.mimwrite(video_output_path, interpolated_images_pred, fps=15, quality=8, macro_block_size=1)
+        scene_params = scene_params[0]
+        scene_params = scene_params.detach().cpu()
+        return scene_params, ref_w2c, T_norm
+@GPU
+def process_generation_request(data, generation_system, cache_dir):
+    """
+    Process the generation request with the same logic as Flask version
+    """
+    try:
+        image_prompt = data.get('image_prompt', None)
+        text_prompt = data.get('text_prompt', "")
+        cameras = data.get('cameras')
+        resolution = data.get('resolution')
+        image_index = data.get('image_index', 0)
+        n_frame, image_height, image_width = resolution
+        if not image_prompt and text_prompt == "":
+            return {'error': 'No Prompts provided'}
+        if image_prompt:
+            # image_prompt可以是路径和base64
+            if os.path.exists(image_prompt):
+                image_prompt = Image.open(image_prompt)
+            else:
+                # image_prompt 可能是 "data:image/png;base64,...."
+                if ',' in image_prompt:
+                    image_prompt = image_prompt.split(',', 1)[1]
+                try:
+                    image_bytes = base64.b64decode(image_prompt)
+                    image_prompt = Image.open(io.BytesIO(image_bytes))
+                except Exception as img_e:
+                    return {'error': f'Image decode error: {str(img_e)}'}
+            image = image_prompt.convert('RGB')
+            w, h = image.size
+            # center crop
+            if image_height / h > image_width / w:
+                scale = image_height / h
+            else:
+                scale = image_width / w
+            new_h = int(image_height / scale)
+            new_w = int(image_width / scale)
+            image = image.crop(((w - new_w) // 2, (h - new_h) // 2,
+                                new_w + (w - new_w) // 2, new_h + (h - new_h) // 2)).resize((image_width, image_height))
+            for camera in cameras:
+                camera['fx'] = camera['fx'] * scale
+                camera['fy'] = camera['fy'] * scale
+                camera['cx'] = (camera['cx'] - (w - new_w) // 2) * scale
+                camera['cy'] = (camera['cy'] - (h - new_h) // 2) * scale
+            image = torch.from_numpy(np.array(image)).float().permute(2, 0, 1) / 255.0 * 2 - 1
+        else:
+            image = None
+        cameras = torch.stack([
+            torch.from_numpy(np.array([camera['quaternion'][0], camera['quaternion'][1], camera['quaternion'][2], camera['quaternion'][3], camera['position'][0], camera['position'][1], camera['position'][2], camera['fx'] / image_width, camera['fy'] / image_height, camera['cx'] / image_width, camera['cy'] / image_height], dtype=np.float32))
+            for camera in cameras
+        ], dim=0)
+        file_id = str(int(time.time() * 1000))
+        start_time = time.time()
+        scene_params, ref_w2c, T_norm = generation_system.generate(cameras, n_frame, image, text_prompt, image_index, image_height, image_width, video_output_path=os.path.join(cache_dir, f'{file_id}.mp4'))
+        end_time = time.time()
+        print(f'生成时间: {end_time - start_time} 秒')
+        with open(os.path.join(cache_dir, f'{file_id}.json'), 'w') as f:
+            json.dump(data, f)
+        splat_path = os.path.join(cache_dir, f'{file_id}.ply')
+        export_ply_for_gaussians(splat_path, scene_params, opacity_threshold=0.001, T_norm=T_norm)
+        if not os.path.exists(splat_path):
+            return {'error': f'{splat_path} not found'}
+        file_size = os.path.getsize(splat_path)
+        response_data = {
+            'success': True,
+            'file_id': file_id,
+            'file_path': splat_path,
+            'file_size': file_size,
+            'download_url': f'/download/{file_id}',
+            'generation_time': end_time - start_time,
+        }
+        return response_data
+    except Exception as e:
+        return {'error': f'Processing error: {str(e)}'}
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--port', type=int, default=7860)
+    parser.add_argument("--ckpt", default=None)
+    parser.add_argument("--cache_dir", type=str, default=None)
+    parser.add_argument("--offload_t5", type=bool, default=False)
+    parser.add_argument("--max_concurrent", type=int, default=1, help="Maximum concurrent generation tasks")
+    args, _ = parser.parse_known_args()
+    # Ensure model.ckpt exists, download if not present
+    if args.ckpt is None:
+        from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
+        ckpt_path = os.path.join(HUGGINGFACE_HUB_CACHE, "models--imlixinyang--FlashWorld", "snapshots", "6a8e88c6f88678ac098e4c82675f0aee555d6e5d", "model.ckpt")
+        if not os.path.exists(ckpt_path):
+            hf_hub_download(repo_id="imlixinyang/FlashWorld", filename="model.ckpt", local_dir_use_symlinks=False)
+    else:
+        ckpt_path = args.ckpt
+    if args.cache_dir is None or args.cache_dir == "":
+        GRADIO_TEMP_DIR = tempfile.gettempdir()
+        cache_dir = os.path.join(GRADIO_TEMP_DIR, "flashworld_gradio")
+    else:
+        cache_dir = args.cache_dir
+    # Create cache directory
+    os.makedirs(cache_dir, exist_ok=True)
+    # Initialize GenerationSystem
+    device = torch.device("cpu")
+    generation_system = GenerationSystem(ckpt_path=ckpt_path, device=device)
+    # Create Gradio interface
+    with gr.Blocks(title="FlashWorld Backend") as demo:
+        gr.Markdown("# FlashWorld Generation Backend")
+        gr.Markdown("This backend processes JSON requests for 3D scene generation.")
+        with gr.Row():
+            with gr.Column():
+                json_input = gr.Textbox(
+                    label="JSON Input",
+                    placeholder="Enter JSON request here...",
+                    lines=10,
+                    value='{"image_prompt": null, "text_prompt": "A beautiful landscape", "cameras": [...], "resolution": [16, 480, 704], "image_index": 0}'
+                )
+                generate_btn = gr.Button("Generate", variant="primary")
+            with gr.Column():
+                json_output = gr.Textbox(
+                    label="JSON Output",
+                    lines=10,
+                    interactive=False
+                )
+        # File download section
+        gr.Markdown("## File Download")
+        with gr.Row():
+            file_id_input = gr.Textbox(
+                label="File ID",
+                placeholder="Enter file ID to download..."
+            )
+            download_btn = gr.Button("Download PLY File")
+            download_output = gr.File(label="Downloaded File")
+        def gradio_generate(json_input):
+            """
+            Gradio interface function that processes JSON input and returns JSON output
+            """
+            try:
+                # Parse JSON input
+                if isinstance(json_input, str):
+                    data = json.loads(json_input)
+                else:
+                    data = json_input
+                # Process the request
+                result = process_generation_request(data, generation_system, cache_dir)
+                # Return JSON response
+                return json.dumps(result, indent=2)
+            except Exception as e:
+                error_response = {'error': f'JSON processing error: {str(e)}'}
+                return json.dumps(error_response, indent=2)
+        def download_file(file_id):
+            """
+            Download generated PLY file
+            """
+            file_path = os.path.join(cache_dir, f'{file_id}.ply')
+            if not os.path.exists(file_path):
+                return None
+            return file_path
+        # Event handlers
+        generate_btn.click(
+            fn=gradio_generate,
+            inputs=[json_input],
+            outputs=[json_output]
+        )
+        download_btn.click(
+            fn=download_file,
+            inputs=[file_id_input],
+            outputs=[download_output]
+        )
+        # Example JSON format
+        gr.Markdown("""
+        ## Example JSON Input Format:
+        ```json
+        {
+            "image_prompt": null,
+            "text_prompt": "A beautiful landscape with mountains and trees",
+            "cameras": [
+                {
+                    "quaternion": [0, 0, 0, 1],
+                    "position": [0, 0, 5],
+                    "fx": 500,
+                    "fy": 500,
+                    "cx": 240,
+                    "cy": 240
+                },
+                {
+                    "quaternion": [0, 0, 0, 1],
+                    "position": [0, 0, 5],
+                    "fx": 500,
+                    "fy": 500,
+                    "cx": 240,
+                    "cy": 240
+                }
+            ],
+            "resolution": [16, 480, 704],
+            "image_index": 0
+        }
+        ```
+        """)
+    from contextlib import asynccontextmanager
+    @asynccontextmanager
+    async def lifespan_ctx(app):
+        app.state._cleanup_stop_event = asyncio.Event()
+        app.state._cleanup_task = asyncio.create_task(periodic_cache_cleanup(app.state._cleanup_stop_event, cache_dir))
+        try:
+            yield
+        finally:
+            if getattr(app.state, "_cleanup_stop_event", None):
+                app.state._cleanup_stop_event.set()
+            if getattr(app.state, "_cleanup_task", None):
+                try:
+                    await app.state._cleanup_task
+                except Exception:
+                    pass
+    app = FastAPI(lifespan=lifespan_ctx)
+    from starlette.responses import FileResponse
+    @app.get("/app")
+    async def read_index():
+        return FileResponse('index.html')
+    app = gr.mount_gradio_app(app, demo, path="/")
+    import uvicorn
+    from fastapi.staticfiles import StaticFiles
+    from fastapi import HTTPException
+    import asyncio
+    # 挂载静态文件目录，使其可以被访问。例如 /cache/<filename>
+    app.mount("/cache", StaticFiles(directory=cache_dir), name="cache")
+    # 删除指定 file_id 的生成文件（以及相关的中间文件）
+    @app.post("/delete/{file_id}")
+    async def delete_generated_file(file_id: str):
+        try:
+            deleted = False
+            # 关联的可能文件：.ply, .json, .mp4
+            for ext in (".ply", ".json", ".mp4"):
+                p = os.path.join(cache_dir, f"{file_id}{ext}")
+                if os.path.exists(p):
+                    try:
+                        os.remove(p)
+                        deleted = True
+                    except Exception:
+                        pass
+            return {"success": True, "deleted": deleted}
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+    # 定期清理创建/修改时间超过15分钟的文件
+    async def periodic_cache_cleanup(stop_event: asyncio.Event, directory: str, max_age_seconds: int = 15 * 60, interval_seconds: int = 300):
+        while not stop_event.is_set():
+            try:
+                now = time.time()
+                for name in os.listdir(directory):
+                    path = os.path.join(directory, name)
+                    try:
+                        if os.path.isfile(path):
+                            mtime = os.path.getmtime(path)
+                            if (now - mtime) > max_age_seconds:
+                                try:
+                                    os.remove(path)
+                                except Exception:
+                                    pass
+                    except Exception:
+                        pass
+            except Exception:
+                pass
+            try:
+                await asyncio.wait_for(stop_event.wait(), timeout=interval_seconds)
+            except asyncio.TimeoutError:
+                continue
+    uvicorn.run(app, host="0.0.0.0", port=7860)

app_gradio.py CHANGED Viewed

@@ -9,43 +9,48 @@ except ImportError:
 import os
 import subprocess
-# def install_cuda_toolkit():
-#     # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
-#     CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run"
-#     CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
-#     subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
-#     subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
-#     subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
-#     os.environ["CUDA_HOME"] = "/usr/local/cuda"
-#     os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
-#     os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
-#         os.environ["CUDA_HOME"],
-#         "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
-#     )
-#     # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
-#     os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
-#     print("Successfully installed CUDA toolkit at: ", os.environ["CUDA_HOME"])
-#     subprocess.call('rm /usr/bin/gcc', shell=True)
-#     subprocess.call('rm /usr/bin/g++', shell=True)
-#     subprocess.call('rm /usr/local/cuda/bin/gcc', shell=True)
-#     subprocess.call('rm /usr/local/cuda/bin/g++', shell=True)
-#     subprocess.call('ln -s /usr/bin/gcc-11 /usr/bin/gcc', shell=True)
-#     subprocess.call('ln -s /usr/bin/g++-11 /usr/bin/g++', shell=True)
-#     subprocess.call('ln -s /usr/bin/gcc-11 /usr/local/cuda/bin/gcc', shell=True)
-#     subprocess.call('ln -s /usr/bin/g++-11 /usr/local/cuda/bin/g++', shell=True)
-#     subprocess.call('gcc --version', shell=True)
-#     subprocess.call('g++ --version', shell=True)
-# install_cuda_toolkit()
-# subprocess.run('pip install git+https://github.com/nerfstudio-project/gsplat.git@32f2a54d21c7ecb135320bb02b136b7407ae5712 --no-build-isolation --use-pep517', env={'CUDA_HOME': "/usr/local/cuda", "TORCH_CUDA_ARCH_LIST": "8.0;8.6"}, shell=True)
 import gradio as gr
 import base64
 import io
@@ -59,6 +64,7 @@ import json
 import time
 import tempfile
 import shutil
 from huggingface_hub import hf_hub_download
@@ -78,7 +84,6 @@ from transformers import T5TokenizerFast, UMT5EncoderModel
 from diffusers import FlowMatchEulerDiscreteScheduler
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 class MyFlowMatchEulerDiscreteScheduler(FlowMatchEulerDiscreteScheduler):
     def index_for_timestep(self, timestep, schedule_timesteps=None):
@@ -152,11 +157,11 @@ class GenerationSystem(nn.Module):
         self.add_feedback_for_transformer()
-        # if ckpt_path is not None:
-        #     state_dict = torch.load(ckpt_path, map_location="cpu", weights_only=False)
-        #     self.transformer.load_state_dict(state_dict["transformer"])
-        #     self.recon_decoder.load_state_dict(state_dict["recon_decoder"])
-        #     print(f"Loaded {ckpt_path}.")
         from quant import FluxFp8GeMMProcessor
@@ -164,6 +169,7 @@ class GenerationSystem(nn.Module):
         del self.vae.post_quant_conv, self.vae.decoder
         self.vae.to(self.device if not self.offload_vae else "cpu")
         self.transformer.to(self.device)
@@ -243,11 +249,12 @@ class GenerationSystem(nn.Module):
             'feat': feats
         }
-    @GPU
     @torch.no_grad()
-    @torch.amp.autocast(dtype=torch.bfloat16, device_type="cuda")
-    def generate(self, cameras, n_frame, image=None, text="", image_index=0, image_height=480, image_width=704, video_output_path=None):
         self.text_encoder.to(self.device if not self.offload_t5 else "cpu")
         self.transformer.to(self.device)
         self.recon_decoder.to(self.device)
@@ -255,7 +262,7 @@ class GenerationSystem(nn.Module):
         self.latents_mean = self.latents_mean.to(self.device)
         self.latents_std = self.latents_std.to(self.device)
-        with torch.no_grad():
             batch_size = 1
             cameras = cameras.to(self.device).unsqueeze(0)
@@ -358,6 +365,7 @@ class GenerationSystem(nn.Module):
         return scene_params, ref_w2c, T_norm
 def process_generation_request(data, generation_system, cache_dir):
     """
     Process the generation request with the same logic as Flask version
@@ -430,9 +438,9 @@ def process_generation_request(data, generation_system, cache_dir):
         with open(os.path.join(cache_dir, f'{file_id}.json'), 'w') as f:
             json.dump(data, f)
-        splat_path = os.path.join(cache_dir, f'{file_id}.ply')
-        export_ply_for_gaussians(splat_path, scene_params, opacity_threshold=0.001, T_norm=T_norm)
         if not os.path.exists(splat_path):
             return {'error': f'{splat_path} not found'}
@@ -453,43 +461,10 @@ def process_generation_request(data, generation_system, cache_dir):
     except Exception as e:
         return {'error': f'Processing error: {str(e)}'}
-def gradio_generate(json_input, generation_system, cache_dir):
-    """
-    Gradio interface function that processes JSON input and returns JSON output
-    """
-    try:
-        # Parse JSON input
-        if isinstance(json_input, str):
-            data = json.loads(json_input)
-        else:
-            data = json_input
-        # Process the request
-        result = process_generation_request(data, generation_system, cache_dir)
-        # Return JSON response
-        return json.dumps(result, indent=2)
-    except Exception as e:
-        error_response = {'error': f'JSON processing error: {str(e)}'}
-        return json.dumps(error_response, indent=2)
-def download_file(file_id, cache_dir):
-    """
-    Download generated PLY file
-    """
-    file_path = os.path.join(cache_dir, f'{file_id}.ply')
-    if not os.path.exists(file_path):
-        return None
-    return file_path
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('--port', type=int, default=7860)
     parser.add_argument("--ckpt", default=None)
-    parser.add_argument("--gpu", type=int, default=0)
     parser.add_argument("--cache_dir", type=str, default=None)
     parser.add_argument("--offload_t5", type=bool, default=False)
     parser.add_argument("--max_concurrent", type=int, default=1, help="Maximum concurrent generation tasks")
@@ -514,15 +489,14 @@ if __name__ == "__main__":
     os.makedirs(cache_dir, exist_ok=True)
     # Initialize GenerationSystem
-    device = f"cuda:{args.gpu}" # if torch.cuda.is_available() else "cpu"
     generation_system = GenerationSystem(ckpt_path=ckpt_path, device=device)
     # Create Gradio interface
     with gr.Blocks(title="FlashWorld Backend") as demo:
-        gr.Markdown("# FlashWorld Generation Backend")
-        gr.Markdown("This backend processes JSON requests for 3D scene generation.")
-        with gr.Row():
             with gr.Column():
                 json_input = gr.Textbox(
                     label="JSON Input",
@@ -541,27 +515,83 @@ if __name__ == "__main__":
                 )
         # File download section
-        gr.Markdown("## File Download")
-        with gr.Row():
             file_id_input = gr.Textbox(
                 label="File ID",
                 placeholder="Enter file ID to download..."
             )
-            download_btn = gr.Button("Download PLY File")
             download_output = gr.File(label="Downloaded File")
         # Event handlers
         generate_btn.click(
-            fn=lambda json_input: gradio_generate(json_input, generation_system, cache_dir),
             inputs=[json_input],
             outputs=[json_output]
         )
         download_btn.click(
-            fn=lambda file_id: download_file(file_id, cache_dir),
             inputs=[file_id_input],
             outputs=[download_output]
         )
         # Example JSON format
         gr.Markdown("""
@@ -592,10 +622,29 @@ if __name__ == "__main__":
             "image_index": 0
         }
         ```
-        """)
-    # Launch the interface
-    demo.launch(
-        ssr_mode=False,
-        allowed_paths=[cache_dir]
-    )

 import os
 import subprocess
+try:
+    import gsplat
+except ImportError:
+    def install_cuda_toolkit():
+        # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
+        CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run"
+        CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
+        subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
+        subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
+        subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
+        os.environ["CUDA_HOME"] = "/usr/local/cuda"
+        os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
+        os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
+            os.environ["CUDA_HOME"],
+            "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
+        )
+        # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
+        os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0+PTX"
+        print("Successfully installed CUDA toolkit at: ", os.environ["CUDA_HOME"])
+        subprocess.call('rm /usr/bin/gcc', shell=True)
+        subprocess.call('rm /usr/bin/g++', shell=True)
+        subprocess.call('ln -s /usr/bin/gcc-11 /usr/bin/gcc', shell=True)
+        subprocess.call('ln -s /usr/bin/g++-11 /usr/bin/g++', shell=True)
+        subprocess.call('gcc --version', shell=True)
+        subprocess.call('g++ --version', shell=True)
+    install_cuda_toolkit()
+    os.environ["TORCH_CUDA_ARCH_LIST"] = "9.0+PTX"
+    os.environ["CUDA_HOME"] = "/usr/local/cuda"
+    os.environ["PATH"] = "/usr/local/cuda/bin/:" + os.environ["PATH"]
+    subprocess.run('pip install git+https://github.com/nerfstudio-project/gsplat.git@32f2a54d21c7ecb135320bb02b136b7407ae5712',
+        env={'CUDA_HOME': "/usr/local/cuda", "TORCH_CUDA_ARCH_LIST": "9.0+PTX", "PATH": "/usr/local/cuda/bin/:" + os.environ["PATH"]}, shell=True)
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
 import gradio as gr
 import base64
 import io
 import time
 import tempfile
 import shutil
+import threading
 from huggingface_hub import hf_hub_download
 from diffusers import FlowMatchEulerDiscreteScheduler
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 class MyFlowMatchEulerDiscreteScheduler(FlowMatchEulerDiscreteScheduler):
     def index_for_timestep(self, timestep, schedule_timesteps=None):
         self.add_feedback_for_transformer()
+        if ckpt_path is not None:
+            state_dict = torch.load(ckpt_path, map_location="cpu", weights_only=False)
+            self.transformer.load_state_dict(state_dict["transformer"])
+            self.recon_decoder.load_state_dict(state_dict["recon_decoder"])
+            print(f"Loaded {ckpt_path}.")
         from quant import FluxFp8GeMMProcessor
         del self.vae.post_quant_conv, self.vae.decoder
         self.vae.to(self.device if not self.offload_vae else "cpu")
+        self.vae.to(torch.bfloat16)
         self.transformer.to(self.device)
             'feat': feats
         }
     @torch.no_grad()
+    def generate(self, cameras, n_frame, image=None, text="", image_index=0, image_height=480, image_width=704, video_output_path=None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.vae.to(self.device)
         self.text_encoder.to(self.device if not self.offload_t5 else "cpu")
         self.transformer.to(self.device)
         self.recon_decoder.to(self.device)
         self.latents_mean = self.latents_mean.to(self.device)
         self.latents_std = self.latents_std.to(self.device)
+        with torch.amp.autocast(dtype=torch.bfloat16, device_type="cuda"):
             batch_size = 1
             cameras = cameras.to(self.device).unsqueeze(0)
         return scene_params, ref_w2c, T_norm
+@GPU
 def process_generation_request(data, generation_system, cache_dir):
     """
     Process the generation request with the same logic as Flask version
         with open(os.path.join(cache_dir, f'{file_id}.json'), 'w') as f:
             json.dump(data, f)
+        splat_path = os.path.join(cache_dir, f'{file_id}.spz')
+        export_gaussians(splat_path, scene_params, opacity_threshold=0.001, T_norm=T_norm)
         if not os.path.exists(splat_path):
             return {'error': f'{splat_path} not found'}
     except Exception as e:
         return {'error': f'Processing error: {str(e)}'}
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('--port', type=int, default=7860)
     parser.add_argument("--ckpt", default=None)
     parser.add_argument("--cache_dir", type=str, default=None)
     parser.add_argument("--offload_t5", type=bool, default=False)
     parser.add_argument("--max_concurrent", type=int, default=1, help="Maximum concurrent generation tasks")
     os.makedirs(cache_dir, exist_ok=True)
     # Initialize GenerationSystem
+    device = torch.device("cpu")
     generation_system = GenerationSystem(ckpt_path=ckpt_path, device=device)
     # Create Gradio interface
     with gr.Blocks(title="FlashWorld Backend") as demo:
+        gr.Markdown("FlashWorld Generation Backend — API only. This service powers the FlashWorld Web Demo and is intended for programmatic/API access. The UI is intentionally hidden.")
+        with gr.Row(visible=False):
             with gr.Column():
                 json_input = gr.Textbox(
                     label="JSON Input",
                 )
         # File download section
+        gr.Markdown("## File Download", visible=False)
+        with gr.Row(visible=False):
             file_id_input = gr.Textbox(
                 label="File ID",
                 placeholder="Enter file ID to download..."
             )
+            download_btn = gr.Button("Download SPZ File")
             download_output = gr.File(label="Downloaded File")
+        def gradio_generate(json_input):
+            """
+            Gradio interface function that processes JSON input and returns JSON output
+            """
+            try:
+                # Parse JSON input
+                if isinstance(json_input, str):
+                    data = json.loads(json_input)
+                else:
+                    data = json_input
+                # Process the request
+                result = process_generation_request(data, generation_system, cache_dir)
+                # Return JSON response
+                return json.dumps(result, indent=2)
+            except Exception as e:
+                error_response = {'error': f'JSON processing error: {str(e)}'}
+                return json.dumps(error_response, indent=2)
+        def download_file(file_id):
+            """
+            Download generated SPZ file
+            """
+            file_path = os.path.join(cache_dir, f'{file_id}.spz')
+            if not os.path.exists(file_path):
+                return None
+            return file_path
+        def gradio_delete(file_id):
+            """
+            Delete generated artifacts by file_id (.spz/.json/.mp4)
+            """
+            deleted = False
+            try:
+                for ext in (".spz", ".json", ".mp4"):
+                    p = os.path.join(cache_dir, f"{file_id}{ext}")
+                    if os.path.exists(p):
+                        try:
+                            os.remove(p)
+                            deleted = True
+                        except Exception:
+                            pass
+                return {"success": True, "deleted": deleted}
+            except Exception as e:
+                return {"success": False, "error": str(e)}
         # Event handlers
         generate_btn.click(
+            fn=gradio_generate,
             inputs=[json_input],
             outputs=[json_output]
         )
         download_btn.click(
+            fn=download_file,
             inputs=[file_id_input],
             outputs=[download_output]
         )
+        # Hidden API hook for deletion to expose /gradio_api/call/gradio_delete
+        _hidden_delete_in = gr.Textbox(visible=False)
+        _hidden_delete_btn = gr.Button(visible=False)
+        _hidden_delete_btn.click(fn=gradio_delete, inputs=[_hidden_delete_in], outputs=[])
         # Example JSON format
         gr.Markdown("""
             "image_index": 0
         }
         ```
+        """, visible=False)
+    # Background periodic cleanup thread (no FastAPI app lifecycle)
+    def _cleanup_loop(directory: str, max_age_seconds: int = 15 * 60, interval_seconds: int = 300):
+        while True:
+            try:
+                now = time.time()
+                for name in os.listdir(directory):
+                    path = os.path.join(directory, name)
+                    try:
+                        if os.path.isfile(path):
+                            mtime = os.path.getmtime(path)
+                            if (now - mtime) > max_age_seconds:
+                                try:
+                                    os.remove(path)
+                                except Exception:
+                                    pass
+                    except Exception:
+                        pass
+            except Exception:
+                pass
+            time.sleep(interval_seconds)
+    threading.Thread(target=_cleanup_loop, args=(cache_dir,), daemon=True).start()
+    demo.launch(allowed_paths=[cache_dir])

index.html CHANGED Viewed

@@ -67,7 +67,7 @@
         .content-container {
             display: flex;
             flex: 1;
-            overflow: hidden;
         }
         .left-panel {
@@ -76,6 +76,7 @@
             border-right: 1px solid rgba(255, 255, 255, 0.1);
             padding: 20px;
             overflow-y: auto;
             flex-shrink: 0;
         }
@@ -86,6 +87,7 @@
             display: flex;
             justify-content: center;
             align-items: center;
         }
         .right-panel {
@@ -95,6 +97,7 @@
             padding: 20px;
             overflow-y: auto;
             flex-shrink: 0;
         }
         .guidance {
@@ -222,6 +225,7 @@
             font-size: 12px;
             cursor: default;
             user-select: none;
         }
         .info-tip .tooltip {
             display: none;
@@ -229,16 +233,17 @@
             left: 0;
             top: calc(100% + 8px); /* show below the icon */
             transform: none;
-            background: rgba(0,0,0,0.9);
             color: #e5e7eb;
-            border: 1px solid rgba(255,255,255,0.15);
             border-radius: 8px;
             padding: 10px 12px;
             font-size: 12px;
-            width: 360px; /* wider tooltip */
             white-space: normal;
-            z-index: 2000; /* above GUI and other elements */
-            box-shadow: 0 4px 12px rgba(0,0,0,0.4);
         }
         .info-tip:hover .tooltip {
             display: block;
@@ -430,8 +435,8 @@
 <script type="importmap">
   {
     "imports": {
-      "three": "https://cdnjs.cloudflare.com/ajax/libs/three.js/0.174.0/three.module.js",
-      "@sparkjsdev/spark": "https://sparkjs.dev/releases/spark/0.1.6/spark.module.js",
       "lil-gui": "https://cdn.jsdelivr.net/npm/[email protected]/+esm"
     }
   }
@@ -469,6 +474,7 @@
                     <div class="step">
                         <h3>1. Configure</h3>
                         <p>Set FOV and Resolution and Click "Fix Configurations"</p>
                     </div>
@@ -640,6 +646,31 @@
   function updateStatus(message, cameraCount = null) {
     const cameraText = cameraCount !== null ? `Cameras: ${cameraCount}` : `Cameras: ${cameraParams.length}`;
     statusBar.textContent = `${message} | ${cameraText} | Status: ${fixGenerationFOV ? 'Ready to record' : 'Configure settings'}`;
   }
   // Show/hide loading
@@ -685,7 +716,151 @@
     if (progressText) progressText.textContent = text;
   }
-  // Gradio handles concurrency automatically, no need for queue polling
   // Hide download progress
   function hideDownloadProgress() {
@@ -741,8 +916,9 @@
   // GUI Options - declare early
   const guiOptions = {
-    // Gradio后端地址，默认为本页面ip:7860
     BackendAddress: `${window.location.protocol}//${window.location.hostname}:7860`,
     FOV: 60,
     LoadFromJson: () => {
       const jsonInput = document.querySelector("#json-input");
@@ -805,11 +981,6 @@
       generateCameraTrajectory(guiOptions.templateType);
     },
     saveTrajectoryToJson: () => {
-      if (cameraParams.length === 0) {
-        updateStatus('No cameras to save.', cameraParams.length);
-        console.warn('No cameras to save');
-        return;
-      }
       // Build JSON payload compatible with loader
       const [nStr, hStr, wStr] = guiOptions.Resolution.split('x');
@@ -913,14 +1084,15 @@
       console.log('Interpolated cameras:', interpolatedCameras.length);
       updateStatus('Sending request to backend...', cameraParams.length);
-      // Gradio后端：使用Gradio API
       const requestData = {
         image_prompt: inputImageBase64 ? inputImageBase64 : "",
         text_prompt: guiOptions.inputTextPrompt,
         image_index: 0,
         resolution: [
-          parseInt(guiOptions.Resolution.split('x')[0]),
-          parseInt(guiOptions.Resolution.split('x')[1]),
           parseInt(guiOptions.Resolution.split('x')[2])
         ],
         cameras: interpolatedCameras.map(cam => ({
@@ -937,191 +1109,125 @@
         }))
       };
-      // 请求Gradio后端生成
-      fetch(guiOptions.BackendAddress + '/gradio_api/call/gradio_generate', {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
         mode: 'cors',
-        body: JSON.stringify({
-          data: [JSON.stringify(requestData)]
-        })
       })
       .then(response => response.json())
       .then(data => {
-        console.log('Gradio response:', data);
-        // Gradio总是返回event_id，需要先获取生成结果
-        if (data.event_id) {
-          console.log('Got EVENT_ID from generation call:', data.event_id);
-          // 使用EVENT_ID获取生成结果（SSE格式）
-          return fetch(guiOptions.BackendAddress + `/gradio_api/call/gradio_generate/${data.event_id}`)
-            .then(response => {
-              if (!response.ok) {
-                throw new Error(`HTTP error! status: ${response.status}`);
-              }
-              return response.text();
             })
-            .then(sseText => {
-              console.log('SSE response:', sseText);
-              // 解析SSE格式的响应
-              const lines = sseText.split('\n');
-              let eventType = null;
-              let dataContent = null;
-              for (const line of lines) {
-                if (line.startsWith('event: ')) {
-                  eventType = line.substring(7);
-                } else if (line.startsWith('data: ')) {
-                  dataContent = line.substring(6);
-                }
-              }
-              console.log('Event type:', eventType, 'Data:', dataContent);
-              if (eventType === 'complete' && dataContent) {
-                // 解析JSON数据
-                const resultData = JSON.parse(dataContent);
-                console.log('Generation result:', resultData);
-                // 解析生成结果
-                if (resultData && resultData.length > 0) {
-                  const responseData = JSON.parse(resultData[0]);
-                  console.log('Gradio generation successful:', responseData);
-                  if (responseData.success && responseData.download_url) {
-                    console.log('Generation time:', responseData.generation_time, 'seconds');
-                    console.log('File size:', responseData.file_size, 'bytes');
-                    // 显示生成信息
-                    showGenerationInfo(responseData.generation_time, responseData.file_size);
-                    showDownloadProgress();
-                    updateStatus('Downloading generated scene...', cameraParams.length);
-                    // 现在下载文件，也需要两步：先获取下载的EVENT_ID，再下载文件
-                    return fetch(guiOptions.BackendAddress + '/gradio_api/call/download_file', {
-                      method: 'POST',
-                      headers: { 'Content-Type': 'application/json' },
-                      body: JSON.stringify({
-                        data: [responseData.file_id]
-                      })
-                    })
-                    .then(response => response.json())
-                    .then(downloadEventData => {
-                      console.log('Download EVENT_ID:', downloadEventData.event_id);
-                      // 使用下载的EVENT_ID获取文件信息（SSE格式）
-                      return fetch(guiOptions.BackendAddress + `/gradio_api/call/download_file/${downloadEventData.event_id}`)
-                        .then(response => {
-                          if (!response.ok) {
-                            throw new Error(`HTTP error! status: ${response.status}`);
-                          }
-                          return response.text();
-                        })
-                        .then(sseText => {
-                          console.log('Download SSE response:', sseText);
-                          // 解析SSE格式的响应
-                          const lines = sseText.split('\n');
-                          let eventType = null;
-                          let dataContent = null;
-                          for (const line of lines) {
-                            if (line.startsWith('event: ')) {
-                              eventType = line.substring(7);
-                            } else if (line.startsWith('data: ')) {
-                              dataContent = line.substring(6);
-                            }
-                          }
-                          console.log('Download event type:', eventType, 'Data:', dataContent);
-                          if (eventType === 'complete' && dataContent) {
-                            // 解析文件信息
-                            const fileData = JSON.parse(dataContent);
-                            console.log('File data:', fileData);
-                            if (fileData && fileData.length > 0 && fileData[0].url) {
-                              const fileUrl = fileData[0].url;
-                              console.log('File URL:', fileUrl);
-                              // 从返回的URL下载实际文件
-                              return fetch(fileUrl)
-                                .then(response => {
-                                  if (!response.ok) {
-                                    throw new Error(`HTTP error! status: ${response.status}`);
-                                  }
-                                  const contentLength = response.headers.get('content-length');
-                                  const total = parseInt(contentLength, 10);
-                                  let loaded = 0;
-                                  const reader = response.body.getReader();
-                                  const chunks = [];
-                                  function pump() {
-                                    return reader.read().then(({ done, value }) => {
-                                      if (done) {
-                                        return new Blob(chunks);
-                                      }
-                                      chunks.push(value);
-                                      loaded += value.length;
-                                      if (total) {
-                                        const percentage = (loaded / total) * 100;
-                                        updateProgressBar(percentage);
-                                      }
-                                      return pump();
-                                    });
-                                  }
-                                  return pump().then(blob => {
-                                    const url = URL.createObjectURL(blob);
-                                    return { url };
-                                  });
-                                });
-                            } else {
-                              throw new Error('Invalid file data format from Gradio');
-                            }
-                          } else {
-                            throw new Error('Gradio download SSE response not complete or missing data');
-                          }
-                        });
-                    });
-                  } else {
-                    throw new Error('Gradio generation failed: ' + (responseData.error || 'Unknown error'));
                   }
-                } else {
-                  throw new Error('Invalid Gradio generation result format');
-                }
-              } else {
-                throw new Error('Gradio SSE response not complete or missing data');
               }
             });
-        } else {
-          throw new Error('Invalid Gradio response format - no event_id');
-        }
       })
       .then(data => {
-        if (data.url) {
           updateStatus('Loading 3D scene...', cameraParams.length);
-          // Remove the instruction splat when generation is complete
           if (instructionSplat) {
             scene.remove(instructionSplat);
             console.log('Instruction splat removed');
           }
           const GeneratedSplat = new SplatMesh({ url: data.url });
           scene.add(GeneratedSplat);
-          currentGeneratedSplat = GeneratedSplat; // 保存新生成的场景引用
           console.log('3D scene loaded successfully!');
           updateStatus('Scene generated successfully!', cameraParams.length);
           hideDownloadProgress();
           showLoading(false);
         }
       })
       .catch(error => {
@@ -1499,7 +1605,8 @@
       // Step 1: Configure Generation Settings
       const step1Folder = gui.addFolder('1. Configure Settings');
-      step1Folder.add(guiOptions, "BackendAddress").name("Gradio Backend Address");
       // FOV和Resolution控制器，初始时启用
       const fovController = step1Folder.add(guiOptions, "FOV", 0, 120, 1).name("FOV").onChange((value) => {
@@ -1546,6 +1653,9 @@
       const loadTrajectoryController = trajectoryFolder.add(guiOptions, "LoadTrajectoryFromJson").name("Load from JSON");
       const saveTrajectoryController = trajectoryFolder.add(guiOptions, "saveTrajectoryToJson").name("Save Trajectory");
       // 清理相机按钮
       const clearAllCamerasController = trajectoryFolder.add(guiOptions, "clearAllCameras").name("Clear All Cameras");
@@ -1612,6 +1722,7 @@
       // Store controllers globally so they can be accessed from guiOptions
       window.fixGenerationFOVController = fixGenerationFOVController;
       // Step 3: Add Scene Prompts
       const step3Folder = gui.addFolder('3. Add Scene Prompts');
@@ -2025,6 +2136,7 @@
         if (loadTrajectoryOnly) {
           updateStatus(`Trajectory loaded: ${cameras.length} cameras`, cameraParams.length);
         } else {
         }
       } catch (error) {
         console.error("JSON data processing error:", error);

         .content-container {
             display: flex;
             flex: 1;
+            overflow: visible; /* Allow tooltips to extend beyond container */
         }
         .left-panel {
             border-right: 1px solid rgba(255, 255, 255, 0.1);
             padding: 20px;
             overflow-y: auto;
+            overflow-x: visible; /* Allow tooltips to extend beyond panel */
             flex-shrink: 0;
         }
             display: flex;
             justify-content: center;
             align-items: center;
+            z-index: 1; /* Lower z-index to allow tooltips to appear above */
         }
         .right-panel {
             padding: 20px;
             overflow-y: auto;
             flex-shrink: 0;
+            z-index: 1; /* Lower z-index to allow tooltips to appear above */
         }
         .guidance {
             font-size: 12px;
             cursor: default;
             user-select: none;
+            z-index: 100000; /* Ensure the tip itself is above everything */
         }
         .info-tip .tooltip {
             display: none;
             left: 0;
             top: calc(100% + 8px); /* show below the icon */
             transform: none;
+            background: rgba(0,0,0,0.95);
             color: #e5e7eb;
+            border: 1px solid rgba(255,255,255,0.2);
             border-radius: 8px;
             padding: 10px 12px;
             font-size: 12px;
+            width: 480px;
             white-space: normal;
+            z-index: 999999; /* Even higher z-index to ensure it's above everything */
+            box-shadow: 0 8px 24px rgba(0,0,0,0.6);
+            text-align: left;
         }
         .info-tip:hover .tooltip {
             display: block;
 <script type="importmap">
   {
     "imports": {
+      "three": "https://cdnjs.cloudflare.com/ajax/libs/three.js/0.178.0/three.module.js",
+      "@sparkjsdev/spark": "https://sparkjs.dev/releases/spark/0.1.9/spark.module.js",
       "lil-gui": "https://cdn.jsdelivr.net/npm/[email protected]/+esm"
     }
   }
                     <div class="step">
                         <h3>1. Configure</h3>
                         <p>Set FOV and Resolution and Click "Fix Configurations"</p>
+                        <p><strong>Important: You also need to specify your Hugging Face Access Token with READ permission to use the online free ZeroGPU service.</strong></p>
                     </div>
   function updateStatus(message, cameraCount = null) {
     const cameraText = cameraCount !== null ? `Cameras: ${cameraCount}` : `Cameras: ${cameraParams.length}`;
     statusBar.textContent = `${message} | ${cameraText} | Status: ${fixGenerationFOV ? 'Ready to record' : 'Configure settings'}`;
+    // Update save trajectory button state
+    updateSaveTrajectoryButton();
+  }
+  // Update save trajectory button state based on camera count
+  function updateSaveTrajectoryButton() {
+    if (window.saveTrajectoryController) {
+      if (cameraParams.length >= 2) {
+        window.saveTrajectoryController.enable();
+      } else {
+        window.saveTrajectoryController.disable();
+      }
+    }
+  }
+  // Auth-aware fetch helper that injects Authorization header when HF_TOKEN is set
+  function fetchWithAuth(url, options = {}) {
+    const mergedOptions = { ...options };
+    const headers = new Headers(options && options.headers ? options.headers : undefined);
+    if (guiOptions && guiOptions.HF_TOKEN && String(guiOptions.HF_TOKEN).trim().length > 0) {
+      headers.set('Authorization', `Bearer ${guiOptions.HF_TOKEN}`);
+    }
+    mergedOptions.headers = headers;
+    return fetch(url, mergedOptions);
   }
   // Show/hide loading
     if (progressText) progressText.textContent = text;
   }
+  // ==============
+  // Queue handling
+  // ==============
+  let queuePollTimer = null;
+  let currentTaskId = null;
+  let initialQueuePosition = null;
+  let latestGenerationTime = null;
+  let lastDownloadPct = 0;
+  let lastDownloadUpdateTs = 0;
+  function showQueueWaiting(position, runningCount, queuedCount) {
+    // Use only the progress bar to show queue progress (from initial position to 0)
+    showDownloadProgress();
+    if (initialQueuePosition === null) {
+      // Initialize from first seen position; ensure >= 1 so 0 -> 100%
+      const initPos = (typeof position === 'number') ? position : 0;
+      initialQueuePosition = Math.max(initPos, 1);
+    }
+    const percent = initialQueuePosition && initialQueuePosition > 0
+      ? Math.max(0, Math.min(100, ((initialQueuePosition - (position || 0)) / initialQueuePosition) * 100))
+      : 0;
+    updateProgressBar(percent);
+    const totalWaiting = (position || 0) + (queuedCount || 0);
+    if (position !== null && position !== undefined) {
+      const pctText = `${Math.round(percent)}%`;
+      if (totalWaiting > 0) {
+        setProgressLabel(`Queued ${position}/${totalWaiting} (${pctText})`);
+      } else {
+        setProgressLabel(`Queued ${position} (${pctText})`);
+      }
+    } else {
+      setProgressLabel('Queued');
+    }
+  }
+  async function pollTaskUntilReady(taskId) {
+    currentTaskId = taskId;
+    initialQueuePosition = null;
+    if (queuePollTimer) {
+      clearInterval(queuePollTimer);
+      queuePollTimer = null;
+    }
+    const queueStartTs = Date.now();
+    const pollOnce = async () => {
+      try {
+        const resp = await fetchWithAuth(`${guiOptions.BackendAddress}/task/${taskId}`);
+        if (!resp.ok) return;
+        const info = await resp.json();
+        if (!info || !info.success) return;
+        const pos = info.queue && typeof info.queue.position === 'number' ? info.queue.position : 0;
+        const running = info.queue ? info.queue.running_count : 0;
+        const queued = info.queue ? info.queue.queued_count : 0;
+        if (info.status === 'queued' || info.status === 'running') {
+          // Only progress bar; set stage label
+          if (info.status === 'queued') {
+            showQueueWaiting(pos, running, queued);
+          } else {
+            // Transitioned to running: finalize queue progress visually
+            updateProgressBar(100);
+            showDownloadProgress();
+            setProgressLabel('Generating...');
+          }
+        }
+        if (info.status === 'completed' && info.download_url) {
+          clearInterval(queuePollTimer);
+          queuePollTimer = null;
+          latestGenerationTime = typeof info.generation_time === 'number' ? info.generation_time : null;
+          // Proceed to download the generated file like the normal path
+          updateStatus('Downloading generated scene...', cameraParams.length);
+          const response = await fetchWithAuth(guiOptions.BackendAddress + info.download_url);
+          if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`);
+          const contentLength = response.headers.get('content-length');
+          const total = parseInt(contentLength || '0', 10);
+          // Show generation info immediately once we know it and total size from headers
+          showGenerationInfo(latestGenerationTime || 0, total);
+          let loaded = 0;
+          const reader = response.body.getReader();
+          const chunks = [];
+          updateProgressBar(0);
+          setProgressLabel('Downloading 0%');
+          lastDownloadPct = 0;
+          lastDownloadUpdateTs = 0;
+          while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            chunks.push(value);
+            loaded += value.length;
+            if (total) {
+              const pct = Math.min(100, (loaded / total) * 100);
+              const now = Date.now();
+              const rounded = Math.round(pct);
+              // Throttle and enforce monotonic increase
+              if (rounded > Math.round(lastDownloadPct) || (now - lastDownloadUpdateTs) > 200) {
+                lastDownloadPct = Math.max(lastDownloadPct, pct);
+                updateProgressBar(lastDownloadPct);
+                setProgressLabel(`Downloading ${Math.round(lastDownloadPct)}%`);
+                lastDownloadUpdateTs = now;
+              }
+            }
+          }
+          if (instructionSplat) {
+            scene.remove(instructionSplat);
+            console.log('Instruction splat removed');
+            instructionSplat = null;
+          }
+          const blob = new Blob(chunks);
+          const url = URL.createObjectURL(blob);
+          // Continue to load the splat
+          updateStatus('Loading generated scene...', cameraParams.length);
+          const GeneratedSplat = new SplatMesh({ url });
+          scene.add(GeneratedSplat);
+          currentGeneratedSplat = GeneratedSplat;
+          updateStatus('Scene generated successfully!', cameraParams.length);
+          // Show generation time and total file size (MB)
+          showGenerationInfo(latestGenerationTime || 0, total || blob.size);
+          // Notify backend to delete the server file after client has downloaded it
+          try {
+            if (info.file_id) {
+              const resp = await fetchWithAuth(`${guiOptions.BackendAddress}/delete/${info.file_id}`, { method: 'POST' });
+              if (!resp.ok) console.warn('Delete notify failed');
+            }
+          } catch (e) {
+            console.warn('Delete notify error', e);
+          }
+          hideDownloadProgress();
+          showLoading(false);
+        } else if (info.status === 'failed') {
+          clearInterval(queuePollTimer);
+          queuePollTimer = null;
+          throw new Error(info.error || 'Generation failed');
+        }
+      } catch (e) {
+        console.debug('Polling error:', e);
+      }
+    };
+    await pollOnce();
+    queuePollTimer = setInterval(pollOnce, 2000);
+  }
   // Hide download progress
   function hideDownloadProgress() {
   // GUI Options - declare early
   const guiOptions = {
+    // 后端地址，默认为本页面ip
     BackendAddress: `${window.location.protocol}//${window.location.hostname}:7860`,
+    HF_TOKEN: "",
     FOV: 60,
     LoadFromJson: () => {
       const jsonInput = document.querySelector("#json-input");
       generateCameraTrajectory(guiOptions.templateType);
     },
     saveTrajectoryToJson: () => {
       // Build JSON payload compatible with loader
       const [nStr, hStr, wStr] = guiOptions.Resolution.split('x');
       console.log('Interpolated cameras:', interpolatedCameras.length);
       updateStatus('Sending request to backend...', cameraParams.length);
+      // 调用 Gradio 后端：POST 到 /gradio_api/call/gradio_generate，然后通过 SSE 获取结果
+      const requestUrl = guiOptions.BackendAddress + '/gradio_api/call/gradio_generate';
       const requestData = {
         image_prompt: inputImageBase64 ? inputImageBase64 : "",
         text_prompt: guiOptions.inputTextPrompt,
         image_index: 0,
         resolution: [
+          parseInt(guiOptions.Resolution.split('x')[0]),
+          parseInt(guiOptions.Resolution.split('x')[1]),
           parseInt(guiOptions.Resolution.split('x')[2])
         ],
         cameras: interpolatedCameras.map(cam => ({
         }))
       };
+      fetchWithAuth(requestUrl, {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
         mode: 'cors',
+        body: JSON.stringify({ data: [JSON.stringify(requestData)] })
       })
       .then(response => response.json())
       .then(data => {
+        // Gradio 总是返回 event_id，需要使用 SSE 获取生成结果
+        if (!data || !data.event_id) {
+          throw new Error('Invalid Gradio response format - no event_id');
+        }
+        return fetchWithAuth(guiOptions.BackendAddress + `/gradio_api/call/gradio_generate/${data.event_id}`)
+          .then(resp => {
+            if (!resp.ok) throw new Error(`HTTP error! status: ${resp.status}`);
+            return resp.text();
+          })
+          .then(sseText => {
+            const lines = sseText.split('\n');
+            let eventType = null;
+            let dataContent = null;
+            for (const line of lines) {
+              if (line.startsWith('event: ')) eventType = line.substring(7);
+              else if (line.startsWith('data: ')) dataContent = line.substring(6);
+            }
+            if (eventType !== 'complete' || !dataContent) {
+              throw new Error('Gradio SSE response not complete or missing data');
+            }
+            const resultData = JSON.parse(dataContent);
+            if (!resultData || resultData.length === 0) {
+              throw new Error('Invalid Gradio generation result format');
+            }
+            const responseData = JSON.parse(resultData[0]);
+            if (!responseData.success) {
+              throw new Error('Gradio generation failed: ' + (responseData.error || 'Unknown error'));
+            }
+            // 显示生成信息
+            showGenerationInfo(responseData.generation_time, responseData.file_size);
+            showDownloadProgress();
+            updateStatus('Downloading generated scene...', cameraParams.length);
+            // ��载文件：调用 download_file 获取下载 event_id，然后通过 SSE 拿到 URL，再实际下载
+            return fetchWithAuth(guiOptions.BackendAddress + '/gradio_api/call/download_file', {
+              method: 'POST',
+              headers: { 'Content-Type': 'application/json' },
+              body: JSON.stringify({ data: [responseData.file_id] })
             })
+            .then(r => r.json())
+            .then(downloadEvent => {
+              return fetchWithAuth(guiOptions.BackendAddress + `/gradio_api/call/download_file/${downloadEvent.event_id}`)
+                .then(r => {
+                  if (!r.ok) throw new Error(`HTTP error! status: ${r.status}`);
+                  return r.text();
+                })
+                .then(downloadSseText => {
+                  const lines = downloadSseText.split('\n');
+                  let eventType = null;
+                  let dataContent = null;
+                  for (const line of lines) {
+                    if (line.startsWith('event: ')) eventType = line.substring(7);
+                    else if (line.startsWith('data: ')) dataContent = line.substring(6);
+                  }
+                  if (eventType !== 'complete' || !dataContent) {
+                    throw new Error('Gradio download SSE response not complete or missing data');
                   }
+                  const fileData = JSON.parse(dataContent);
+                  if (!fileData || fileData.length === 0 || !fileData[0].url) {
+                    throw new Error('Invalid file data format from Gradio');
+                  }
+                  return fileData[0].url;
+                });
+            });
+          })
+          .then(fileUrl => {
+            return fetchWithAuth(fileUrl).then(response => {
+              if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`);
+              const contentLength = response.headers.get('content-length');
+              const total = parseInt(contentLength || '0', 10);
+              let loaded = 0;
+              const reader = response.body.getReader();
+              const chunks = [];
+              function pump() {
+                return reader.read().then(({ done, value }) => {
+                  if (done) return new Blob(chunks);
+                  chunks.push(value);
+                  loaded += value.length;
+                  if (total) updateProgressBar((loaded / total) * 100);
+                  return pump();
+                });
               }
+              return pump().then(blob => {
+                const url = URL.createObjectURL(blob);
+            return { url, __deleteAfterDownloadFileId: (typeof responseData !== 'undefined' ? responseData.file_id : null) };
+              });
             });
+          });
       })
       .then(data => {
+        if (data && data.url) {
           updateStatus('Loading 3D scene...', cameraParams.length);
           if (instructionSplat) {
             scene.remove(instructionSplat);
             console.log('Instruction splat removed');
           }
           const GeneratedSplat = new SplatMesh({ url: data.url });
           scene.add(GeneratedSplat);
+          currentGeneratedSplat = GeneratedSplat;
           console.log('3D scene loaded successfully!');
           updateStatus('Scene generated successfully!', cameraParams.length);
           hideDownloadProgress();
           showLoading(false);
+          // 通知后端删除文件（如果有 file_id）
+          if (data.__deleteAfterDownloadFileId) {
+            fetchWithAuth(guiOptions.BackendAddress + '/delete/' + data.__deleteAfterDownloadFileId, { method: 'POST' })
+              .then(() => console.log('Delete notify sent'))
+              .catch(err => console.warn('Delete notify failed', err));
+          }
         }
       })
       .catch(error => {
       // Step 1: Configure Generation Settings
       const step1Folder = gui.addFolder('1. Configure Settings');
+      step1Folder.add(guiOptions, "BackendAddress").name("Backend Address");
+      step1Folder.add(guiOptions, "HF_TOKEN").name("HF Token");
       // FOV和Resolution控制器，初始时启用
       const fovController = step1Folder.add(guiOptions, "FOV", 0, 120, 1).name("FOV").onChange((value) => {
       const loadTrajectoryController = trajectoryFolder.add(guiOptions, "LoadTrajectoryFromJson").name("Load from JSON");
       const saveTrajectoryController = trajectoryFolder.add(guiOptions, "saveTrajectoryToJson").name("Save Trajectory");
+      // 初始状态：禁用保存按钮（相机数量不够）
+      saveTrajectoryController.disable();
       // 清理相机按钮
       const clearAllCamerasController = trajectoryFolder.add(guiOptions, "clearAllCameras").name("Clear All Cameras");
       // Store controllers globally so they can be accessed from guiOptions
       window.fixGenerationFOVController = fixGenerationFOVController;
+      window.saveTrajectoryController = saveTrajectoryController;
       // Step 3: Add Scene Prompts
       const step3Folder = gui.addFolder('3. Add Scene Prompts');
         if (loadTrajectoryOnly) {
           updateStatus(`Trajectory loaded: ${cameras.length} cameras`, cameraParams.length);
         } else {
+          updateStatus(`JSON loaded: ${cameras.length} cameras`, cameraParams.length);
         }
       } catch (error) {
         console.error("JSON data processing error:", error);

models/render.py CHANGED Viewed

@@ -6,8 +6,6 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from gsplat import rasterization
 # torch.backends.cuda.preferred_linalg_library(backend="magma")
 """"
@@ -17,6 +15,9 @@ class GaussianRendererWithCheckpoint(torch.autograd.Function):
     @staticmethod
     def render(xyz, feature, scale, rotation, opacity, test_c2w, test_intr,
                W, H, sh_degree, near_plane, far_plane, backgrounds):
         test_w2c = test_c2w.float().inverse().unsqueeze(0) # (1, 4, 4)
         test_intr_i = torch.zeros(3, 3).to(test_intr.device)
         test_intr_i[0, 0] = test_intr[0]
@@ -29,6 +30,7 @@ class GaussianRendererWithCheckpoint(torch.autograd.Function):
                                         test_w2c, test_intr_i, W, H, sh_degree=sh_degree,
                                         near_plane=near_plane, far_plane=far_plane,
                                         render_mode="RGB+D",
                                         backgrounds=backgrounds[None],
                                         rasterize_mode='classic') # (1, H, W, 4)
         # rendering[..., 3:] = rendering[..., 3:] + far_plane * (1 - alpha)

 import torch.nn as nn
 import torch.nn.functional as F
 # torch.backends.cuda.preferred_linalg_library(backend="magma")
 """"
     @staticmethod
     def render(xyz, feature, scale, rotation, opacity, test_c2w, test_intr,
                W, H, sh_degree, near_plane, far_plane, backgrounds):
+        from gsplat import rasterization
         test_w2c = test_c2w.float().inverse().unsqueeze(0) # (1, 4, 4)
         test_intr_i = torch.zeros(3, 3).to(test_intr.device)
         test_intr_i[0, 0] = test_intr[0]
                                         test_w2c, test_intr_i, W, H, sh_degree=sh_degree,
                                         near_plane=near_plane, far_plane=far_plane,
                                         render_mode="RGB+D",
+                                        tile_size=16,
                                         backgrounds=backgrounds[None],
                                         rasterize_mode='classic') # (1, H, W, 4)
         # rendering[..., 3:] = rendering[..., 3:] + far_plane * (1 - alpha)

packages.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 libglm-dev
-ffmpeg

 libglm-dev
+ffmpeg
+gcc-11
+g++-11

pre-requirements.txt CHANGED Viewed

@@ -14,4 +14,5 @@ ftfy==6.3.1
 flask==3.1.2
 gradio==5.49.1
 gsplat==1.5.2
-accelerate==1.10.1

 flask==3.1.2
 gradio==5.49.1
 gsplat==1.5.2
+accelerate==1.10.1
+nanobind=2.9.2

quant.py CHANGED Viewed

@@ -138,7 +138,7 @@ class FP8DynamicLinear(torch.nn.Module):
         super().__init__()
         self.weight = torch.nn.Parameter(weight, requires_grad=False)
         self.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
-        self.bias = bias
         self.native_fp8_support = native_fp8_support
         self.dtype = dtype
@@ -186,7 +186,6 @@ def FluxFp8GeMMProcessor(model: torch.nn.Module):
                 weight_scale=weight_scale,
                 bias=bias,
                 native_fp8_support=native_fp8_support,
-                dtype=linear.weight.dtype
             )
             replace_module(model, name, quant_linear)
             del linear.weight

         super().__init__()
         self.weight = torch.nn.Parameter(weight, requires_grad=False)
         self.weight_scale = torch.nn.Parameter(weight_scale, requires_grad=False)
+        self.bias = torch.nn.Parameter(bias.to(dtype), requires_grad=False)
         self.native_fp8_support = native_fp8_support
         self.dtype = dtype
                 weight_scale=weight_scale,
                 bias=bias,
                 native_fp8_support=native_fp8_support,
             )
             replace_module(model, name, quant_linear)
             del linear.weight

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- git+https://github.com/huggingface/diffusers.git@447e8322f76efea55d4769cd67c372edbf0715b8


1	+ git+https://github.com/huggingface/diffusers.git@447e8322f76efea55d4769cd67c372edbf0715b8
2	+ gti+https://github.com/nianticlabs/spz.git@a4fc69e7948c7152e807e6501d73ddc9c149ce37

utils.py CHANGED Viewed

@@ -125,7 +125,7 @@ class TimestepEmbedding(nn.Module):
         else:
             return timestep_embedding(t, self.dim, self.max_period, self.time_factor) * self.weight.unsqueeze(0)
-@torch.compile(mode="max-autotune-no-cudagraphs", dynamic=True)
 def timestep_embedding(t, dim, max_period=10000, time_factor: float = 1000.0):
     """
     Create sinusoidal timestep embeddings.
@@ -341,7 +341,7 @@ def matrix_to_square(mat):
     elif l==4:
         return torch.cat([mat, torch.tensor([0,0,0,1]).repeat(mat.shape[0],mat.shape[1],1,1).to(mat.device)],dim=2)
-def export_ply_for_gaussians(path, gaussians, opacity_threshold=0.00, T_norm=None):
     sh_degree = int(math.sqrt((gaussians.shape[-1] - sum([3, 1, 3, 4])) / 3 - 1))
@@ -380,28 +380,54 @@ def export_ply_for_gaussians(path, gaussians, opacity_threshold=0.00, T_norm=Non
     scales = scales.detach() #.cpu().numpy()
     rotations = rotations.detach() #.cpu().numpy()
-    l = ['x', 'y', 'z']
-    # All channels except the 3 DC
-    for i in range(f_dc.shape[1]):
-        l.append('f_dc_{}'.format(i))
-    l.append('opacity')
-    for i in range(scales.shape[1]):
-        l.append('scale_{}'.format(i))
-    for i in range(rotations.shape[1]):
-        l.append('rot_{}'.format(i))
-    dtype_full = [(attribute, 'f4') for attribute in l]
-    # 最优化方案：使用numpy的recarray直接创建
-    attributes = torch.cat((xyzs, f_dc, opacities, scales, rotations), dim=1).cpu().numpy()
-    # 使用recarray直接创建，避免循环和类型转换
-    elements = np.rec.fromarrays([attributes[:, i] for i in range(attributes.shape[1])], names=l, formats=['f4'] * len(l))
-    el = PlyElement.describe(elements, 'vertex')
-    print(path)
-    PlyData([el]).write(path)
     # plydata = PlyData([el])

         else:
             return timestep_embedding(t, self.dim, self.max_period, self.time_factor) * self.weight.unsqueeze(0)
+# @torch.compile(mode="max-autotune-no-cudagraphs", dynamic=True)
 def timestep_embedding(t, dim, max_period=10000, time_factor: float = 1000.0):
     """
     Create sinusoidal timestep embeddings.
     elif l==4:
         return torch.cat([mat, torch.tensor([0,0,0,1]).repeat(mat.shape[0],mat.shape[1],1,1).to(mat.device)],dim=2)
+def export_gaussians(path, gaussians, opacity_threshold=0.00, T_norm=None):
     sh_degree = int(math.sqrt((gaussians.shape[-1] - sum([3, 1, 3, 4])) / 3 - 1))
     scales = scales.detach() #.cpu().numpy()
     rotations = rotations.detach() #.cpu().numpy()
+    """spz
+    Data Layout
+    The Python bindings maintain the same data layout as the C++ library:
+    Positions: [x1, y1, z1, x2, y2, z2, ...]
+    Scales: [sx1, sy1, sz1, sx2, sy2, sz2, ...] (log-scale)
+    Rotations: [x1, y1, z1, w1, x2, y2, z2, w2, ...] (quaternions)
+    Alphas: [a1, a2, a3, ...] (before sigmoid activation)
+    Colors: [r1, g1, b1, r2, g2, b2, ...] (base RGB)
+    Spherical Harmonics: Coefficient-major order, e.g., for degree 1: [sh1n1_r, sh1n1_g, sh1n1_b, sh10_r, sh10_g, sh10_b, sh1p1_r, sh1p1_g, sh1p1_b, ...]
+    """
+    import spz
+    cloud = spz.GaussianCloud()
+    cloud.sh_degree = sh_degree
+    cloud.positions = xyzs.flatten().cpu().numpy()
+    cloud.scales = scales.flatten().cpu().numpy()
+    cloud.rotations = rotations[:, [3, 0, 1, 2]].flatten().cpu().numpy()
+    cloud.alphas = opacities.flatten().cpu().numpy()
+    cloud.colors = f_dc[..., :3].flatten().cpu().numpy()
+    cloud.sh = f_dc[..., 3:].flatten().cpu().numpy()
+    spz.save_spz(cloud, spz.PackOptions(), path)
+    # l = ['x', 'y', 'z']
+    # # All channels except the 3 DC
+    # for i in range(f_dc.shape[1]):
+    #     l.append('f_dc_{}'.format(i))
+    # l.append('opacity')
+    # for i in range(scales.shape[1]):
+    #     l.append('scale_{}'.format(i))
+    # for i in range(rotations.shape[1]):
+    #     l.append('rot_{}'.format(i))
+    # dtype_full = [(attribute, 'f4') for attribute in l]
+    # # 最优化方案：使用numpy的recarray直接创建
+    # attributes = torch.cat((xyzs, f_dc, opacities, scales, rotations), dim=1).cpu().numpy()
+    # # 使用recarray直接创建，避免循环和类型转换
+    # elements = np.rec.fromarrays([attributes[:, i] for i in range(attributes.shape[1])], names=l, formats=['f4'] * len(l))
+    # el = PlyElement.describe(elements, 'vertex')
+    # print(path)
+    # PlyData([el]).write(path)
     # plydata = PlyData([el])