ml-sharp / app.py
ronedgecomb's picture
Initial commit
a1b6914 verified
"""SHARP Gradio demo (minimal, responsive UI).
This Space:
- Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image.
- Exports a canonical `.ply` file for download.
- Optionally renders a camera trajectory `.mp4` (CUDA / ZeroGPU only).
Precompiled examples
Place precompiled examples under `assets/examples/`.
Recommended structure (matching stem):
assets/examples/<name>.jpg|png|webp
assets/examples/<name>.mp4
assets/examples/<name>.ply
Optional manifest (assets/examples/manifest.json):
[
{"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
...
]
"""
from __future__ import annotations
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Final
import gradio as gr
from model_utils import TrajectoryType, predict_and_maybe_render_gpu
# -----------------------------------------------------------------------------
# Paths & constants
# -----------------------------------------------------------------------------
APP_DIR: Final[Path] = Path(__file__).resolve().parent
OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
ASSETS_DIR: Final[Path] = APP_DIR / "assets"
EXAMPLES_DIR: Final[Path] = ASSETS_DIR / "examples"
IMAGE_EXTS: Final[tuple[str, ...]] = (".png", ".jpg", ".jpeg", ".webp")
DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32
THEME: Final = gr.themes.Soft(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
)
CSS: Final[str] = """
/* Keep layout stable when scrollbars appear/disappear */
html { scrollbar-gutter: stable; }
/* Use normal document flow (no fixed-height viewport shell) */
html, body { height: auto; }
body { overflow: auto; }
/* Comfortable max width; still fills small screens */
.gradio-container {
max-width: 1400px;
margin: 0 auto;
padding: 0.75rem 1rem 1rem;
box-sizing: border-box;
}
/* Make media components responsive without stretching */
#run-image, #run-video,
#examples-image, #examples-video {
width: 100%;
}
/* Keep aspect ratio and prevent runaway vertical growth on tall viewports */
#run-image img, #examples-image img {
width: 100%;
height: auto;
max-height: 70vh;
object-fit: contain;
}
#run-video video, #examples-video video {
width: 100%;
height: auto;
max-height: 70vh;
object-fit: contain;
}
/* On very small screens, reduce max media height a bit */
@media (max-width: 640px) {
#run-image img, #examples-image img,
#run-video video, #examples-video video {
max-height: 55vh;
}
}
/* Reduce extra whitespace in markdown blocks */
.gr-markdown > :first-child { margin-top: 0 !important; }
.gr-markdown > :last-child { margin-bottom: 0 !important; }
"""
# -----------------------------------------------------------------------------
# Helpers
# -----------------------------------------------------------------------------
def _ensure_dir(path: Path) -> Path:
path.mkdir(parents=True, exist_ok=True)
return path
@dataclass(frozen=True, slots=True)
class ExampleSpec:
"""A precompiled example bundle (image + optional mp4 + optional ply)."""
label: str
image: Path
video: Path | None
ply: Path | None
def _normalize_key(path: str) -> str:
"""Normalize a path-like string for stable dictionary keys."""
try:
return str(Path(path).resolve())
except Exception:
return path
def _load_manifest(manifest_path: Path) -> list[dict]:
"""Load manifest.json if present; return an empty list on errors."""
try:
data = json.loads(manifest_path.read_text(encoding="utf-8"))
if not isinstance(data, list):
raise ValueError("manifest.json must contain a JSON list.")
return [x for x in data if isinstance(x, dict)]
except FileNotFoundError:
return []
except Exception as e:
# Manifest errors should not crash the app.
print(f"[examples] Failed to parse manifest.json: {type(e).__name__}: {e}")
return []
def discover_examples(examples_dir: Path) -> list[ExampleSpec]:
"""Discover example bundles under assets/examples/."""
_ensure_dir(examples_dir)
manifest_rows = _load_manifest(examples_dir / "manifest.json")
if manifest_rows:
specs: list[ExampleSpec] = []
for row in manifest_rows:
label = str(row.get("label") or "Example").strip() or "Example"
image_rel = row.get("image")
if not image_rel:
continue
image = (examples_dir / str(image_rel)).resolve()
if not image.exists():
continue
video = None
ply = None
if row.get("video"):
v = (examples_dir / str(row["video"])).resolve()
if v.exists():
video = v
if row.get("ply"):
p = (examples_dir / str(row["ply"])).resolve()
if p.exists():
ply = p
specs.append(ExampleSpec(label=label, image=image, video=video, ply=ply))
return specs
# Fallback: infer bundles by filename stem
images: list[Path] = []
for ext in IMAGE_EXTS:
images.extend(sorted(examples_dir.glob(f"*{ext}")))
specs = []
for img in images:
stem = img.stem
video = examples_dir / f"{stem}.mp4"
ply = examples_dir / f"{stem}.ply"
specs.append(
ExampleSpec(
label=stem.replace("_", " ").strip() or stem,
image=img.resolve(),
video=video.resolve() if video.exists() else None,
ply=ply.resolve() if ply.exists() else None,
)
)
return specs
_ensure_dir(OUTPUTS_DIR)
EXAMPLE_SPECS: Final[list[ExampleSpec]] = discover_examples(EXAMPLES_DIR)
EXAMPLE_INDEX_BY_PATH: Final[dict[str, ExampleSpec]] = {
_normalize_key(str(s.image)): s for s in EXAMPLE_SPECS
}
EXAMPLE_INDEX_BY_NAME: Final[dict[str, ExampleSpec]] = {
s.image.name: s for s in EXAMPLE_SPECS
}
def load_example_assets(
image_path: str | None,
) -> tuple[str | None, str | None, str | None, str]:
"""Return (image, video, ply_path, status) for the selected example image."""
if not image_path:
return None, None, None, "No example selected."
spec = EXAMPLE_INDEX_BY_PATH.get(_normalize_key(image_path))
if spec is None:
spec = EXAMPLE_INDEX_BY_NAME.get(Path(image_path).name)
if spec is None:
return image_path, None, None, "No matching example bundle found."
video = str(spec.video) if spec.video is not None else None
ply_path = str(spec.ply) if spec.ply is not None else None
missing: list[str] = []
if video is None:
missing.append("MP4")
if ply_path is None:
missing.append("PLY")
msg = f"Loaded example: **{spec.label}**."
if missing:
msg += f" Missing: {', '.join(missing)}."
return str(spec.image), video, ply_path, msg
def _validate_image(image_path: str | None) -> None:
if not image_path:
raise gr.Error("Upload an image first.")
def run_sharp(
image_path: str | None,
trajectory_type: TrajectoryType,
output_long_side: int,
num_frames: int,
fps: int,
render_video: bool,
) -> tuple[str | None, str | None, str]:
"""Run SHARP inference and return (video_path, ply_path, status_markdown)."""
_validate_image(image_path)
out_long_side: int | None = (
None if int(output_long_side) <= 0 else int(output_long_side)
)
try:
video_path, ply_path = predict_and_maybe_render_gpu(
image_path,
trajectory_type=trajectory_type,
num_frames=int(num_frames),
fps=int(fps),
output_long_side=out_long_side,
render_video=bool(render_video),
)
lines: list[str] = [f"**PLY:** `{ply_path.name}` (ready to download)"]
if render_video:
if video_path is None:
lines.append("**Video:** not rendered (CUDA unavailable).")
else:
lines.append(f"**Video:** `{video_path.name}`")
else:
lines.append("**Video:** disabled.")
return (
str(video_path) if video_path is not None else None,
str(ply_path),
"\n".join(lines),
)
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"SHARP failed: {type(e).__name__}: {e}") from e
# -----------------------------------------------------------------------------
# UI
# -----------------------------------------------------------------------------
def build_demo() -> gr.Blocks:
with gr.Blocks(
title="SHARP • Single-Image 3D Gaussian Prediction",
elem_id="sharp-root",
fill_height=True,
) as demo:
gr.Markdown("## SHARP\nSingle-image **3D Gaussian scene** prediction.")
# Run tab components are referenced by Examples tab, so keep them in outer scope.
with gr.Column(elem_id="tabs-shell"):
with gr.Tabs():
with gr.Tab("Run", id="run"):
with gr.Column(elem_id="run-panel"):
with gr.Row(equal_height=True, elem_id="run-media-row"):
with gr.Column(
scale=5, min_width=360, elem_id="run-left-col"
):
image_in = gr.Image(
label="Input image",
type="filepath",
sources=["upload"],
elem_id="run-image",
)
with gr.Row():
trajectory = gr.Dropdown(
label="Trajectory",
choices=[
"swipe",
"shake",
"rotate",
"rotate_forward",
],
value="rotate_forward",
)
output_res = gr.Dropdown(
label="Output long side",
info="0 = match input",
choices=[
("Match input", 0),
("512", 512),
("768", 768),
("1024", 1024),
("1280", 1280),
("1536", 1536),
],
value=0,
)
with gr.Row():
frames = gr.Slider(
label="Frames",
minimum=24,
maximum=120,
step=1,
value=60,
)
fps_in = gr.Slider(
label="FPS",
minimum=8,
maximum=60,
step=1,
value=30,
)
render_toggle = gr.Checkbox(
label="Render MP4 (CUDA / ZeroGPU only)",
value=True,
)
with gr.Column(
scale=5, min_width=360, elem_id="run-right-col"
):
video_out = gr.Video(
label="Trajectory video (MP4)",
elem_id="run-video",
)
with gr.Row(elem_id="run-download-row"):
ply_download = gr.DownloadButton(
label="Download PLY (.ply)",
value=None,
visible=True,
elem_id="run-ply-download",
)
status_md = gr.Markdown("", elem_id="run-status")
with gr.Row(elem_id="run-actions-row"):
run_btn = gr.Button("Generate", variant="primary")
clear_btn = gr.ClearButton(
[image_in, video_out, ply_download, status_md],
value="Clear",
)
# Ensure clearing also clears any previous download target.
clear_btn.click(
fn=lambda: None,
outputs=[ply_download],
queue=False,
)
run_btn.click(
fn=run_sharp,
inputs=[
image_in,
trajectory,
output_res,
frames,
fps_in,
render_toggle,
],
outputs=[video_out, ply_download, status_md],
api_visibility="public",
)
with gr.Tab("Examples", id="examples"):
with gr.Column(elem_id="examples-panel"):
if EXAMPLE_SPECS:
gr.Markdown(
"Click an example to preview precompiled outputs. "
"The example image will also be loaded into the Run tab."
)
# Define preview outputs first (unrendered), so we can reference them from gr.Examples.
ex_img = gr.Image(
label="Example image",
type="filepath",
interactive=False,
render=False,
height=360,
elem_id="examples-image",
)
ex_vid = gr.Video(
label="Pre-rendered MP4",
render=False,
height=360,
elem_id="examples-video",
)
ex_ply = gr.DownloadButton(
label="Download PLY (.ply)",
value=None,
visible=True,
render=False,
elem_id="examples-ply-download",
)
ex_status = gr.Markdown(
render=False, elem_id="examples-status"
)
with gr.Row(equal_height=True):
with gr.Column(scale=4, min_width=320):
gr.Examples(
examples=[
[str(s.image)] for s in EXAMPLE_SPECS
],
example_labels=[s.label for s in EXAMPLE_SPECS],
inputs=[image_in],
outputs=[ex_img, ex_vid, ex_ply, ex_status],
fn=load_example_assets,
cache_examples=False,
run_on_click=True,
examples_per_page=10,
label=None,
)
with gr.Column(scale=6, min_width=360):
ex_img.render()
ex_vid.render()
ex_ply.render()
ex_status.render()
gr.Markdown(
"Add example bundles under `assets/examples/` "
"(image + mp4 + ply) or provide a `manifest.json`."
)
else:
gr.Markdown(
"No precompiled examples found.\n\n"
"Add files under `assets/examples/`:\n"
"- `example.jpg` (or png/webp)\n"
"- `example.mp4`\n"
"- `example.ply`\n\n"
"Optionally add `assets/examples/manifest.json` to define labels and filenames."
)
with gr.Tab("About", id="about"):
with gr.Column(elem_id="about-panel"):
gr.Markdown(
"""
*Sharp Monocular View Synthesis in Less Than a Second* (Apple, 2025)
```bibtex
@inproceedings{Sharp2025:arxiv,
title = {Sharp Monocular View Synthesis in Less Than a Second},
author = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Ama\\"{e}l Delaunoyand Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun},
journal = {arXiv preprint arXiv:2512.10685},
year = {2025},
url = {https://arxiv.org/abs/2512.10685},
}
```
""".strip()
)
demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1)
return demo
demo = build_demo()
if __name__ == "__main__":
demo.launch(theme=THEME, css=CSS)