Spaces:

Daniellesry
/

DKT

Running on Zero

App Files Files Community

Daniellesry commited on 10 days ago

Commit

d823b65

1 Parent(s): 4ef344d

s

Browse files

Files changed (1) hide show

app.py +251 -254

app.py CHANGED Viewed

@@ -396,293 +396,290 @@ def process_video(
-def main():
-    #* gradio creation and initialization
-    css = """
-    #video-display-container {
-        max-height: 100vh;
-    }
-    #video-display-input {
-        max-height: 80vh;
-    }
-    #video-display-output {
-        max-height: 80vh;
-    }
-    #download {
-        height: 62px;
-    }
-    .title {
-        text-align: center;
-    }
-    .description {
-        text-align: center;
-    }
-    .gradio-examples {
-        max-height: 400px;
-        overflow-y: auto;
-    }
-    .gradio-examples .examples-container {
-        display: grid;
-        grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
-        gap: 10px;
-        padding: 10px;
-    }
-    .gradio-container .gradio-examples .pagination,
-    .gradio-container .gradio-examples .pagination button,
-    div[data-testid="examples"] .pagination,
-    div[data-testid="examples"] .pagination button {
-        font-size: 28px !important;
-        font-weight: bold !important;
-        padding: 15px 20px !important;
-        min-width: 60px !important;
-        height: 60px !important;
-        border-radius: 10px !important;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-        color: white !important;
-        border: none !important;
-        cursor: pointer !important;
-        margin: 8px !important;
-        display: inline-block !important;
-        box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
-        transition: all 0.3s ease !important;
-    }
-    div[data-testid="examples"] .pagination button:not(.active),
-    .gradio-container .gradio-examples .pagination button:not(.active) {
-        font-size: 32px !important;
-        font-weight: bold !important;
-        padding: 15px 20px !important;
-        min-width: 60px !important;
-        height: 60px !important;
-        background: linear-gradient(135deg, #8a9cf0 0%, #9a6bb2 100%) !important;
-        opacity: 0.8 !important;
-    }
-    div[data-testid="examples"] .pagination button:hover,
-    .gradio-container .gradio-examples .pagination button:hover {
-        background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%) !important;
-        transform: translateY(-2px) !important;
-        box-shadow: 0 6px 12px rgba(0,0,0,0.3) !important;
-        opacity: 1 !important;
-    }
-    div[data-testid="examples"] .pagination button.active,
-    .gradio-container .gradio-examples .pagination button.active {
-        background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%) !important;
-        box-shadow: 0 4px 8px rgba(17,153,142,0.4) !important;
-        opacity: 1 !important;
-    }
-    button[class*="pagination"],
-    button[class*="page"] {
-        font-size: 28px !important;
-        font-weight: bold !important;
-        padding: 15px 20px !important;
-        min-width: 60px !important;
-        height: 60px !important;
-        border-radius: 10px !important;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-        color: white !important;
-        border: none !important;
-        cursor: pointer !important;
-        margin: 8px !important;
-        box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
-        transition: all 0.3s ease !important;
-    }
     """
-    head_html = """
-    <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
-    <link rel="shortcut icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
-    <link rel="icon" type="image/png" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
     """
-    # description = """Official demo for **DKT **."""
-    # with gr.Blocks(css=css, title="DKT - Diffusion Knows Transparency", favicon_path="favicon.ico") as demo:
-    height = 480
-    width = 832
-    window_size = 21
-    with gr.Blocks(css=css, title="DKT", head=head_html) as demo:
-        # gr.Markdown(title, elem_classes=["title"])
-        """
-        <a title="Website" href="https://stable-x.github.io/StableNormal/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
-                <img src="https://www.obukhov.ai/img/badges/badge-website.svg">
-            </a>
-            <a title="arXiv" href="https://arxiv.org/abs/2406.16864" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
-                <img src="https://www.obukhov.ai/img/badges/badge-pdf.svg">
-            </a>
-            <a title="Social" href="https://x.com/ychngji6" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
-                <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
-            </a>
-        """
-        gr.Markdown(
-            """
-            # Diffusion Knows Transparency: Repurposing Video Diffusion for Transparent Object Depth and Normal Estimation
-            <p align="center">
-            <a title="Github" href="https://github.com/Daniellli/DKT" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
-                <img src="https://img.shields.io/github/stars/Daniellli/DKT?style=social" alt="badge-github-stars">
-            </a>
-        """
-        )
-        # gr.Markdown(description, elem_classes=["description"])
-        # gr.Markdown("### Video Processing Demo", elem_classes=["description"])
-        with gr.Row():
-            with gr.Column():
-                input_video = gr.Video(label="Input Video", elem_id='video-display-input')
-                model_size = gr.Radio(
-                    choices=["1.3B", "14B"],
-                    value="1.3B",
-                    label="Model Size"
-                )
-                with gr.Accordion("Advanced Parameters", open=False):
-                    num_inference_steps = gr.Slider(
-                        minimum=1, maximum=50, value=5, step=1,
-                        label="Number of Inference Steps"
-                    )
-                    overlap = gr.Slider(
-                        minimum=1, maximum=20, value=3, step=1,
-                        label="Overlap"
-                    )
-                submit = gr.Button(value="Compute Depth", variant="primary")
-            with gr.Column():
-                output_video = gr.Video(
-                    label="Depth Outputs",
-                    elem_id='video-display-output',
-                    autoplay=True
-                )
-                vis_video = gr.Video(
-                    label="Visualization Video",
-                    visible=False,
-                    autoplay=True
-                )
-        with gr.Row():
-            gr.Markdown("### 3D Point Cloud Visualization", elem_classes=["title"])
-        with gr.Row(equal_height=True):
-            with gr.Column(scale=1):
-                output_point_map0 = LitModel3D(
-                    label="Point Cloud Key Frame 1",
-                    clear_color=[1.0, 1.0, 1.0, 1.0],
-                    interactive=False,
-                    # height=400,
-                )
-            with gr.Column(scale=1):
-                output_point_map1 = LitModel3D(
-                    label="Point Cloud Key Frame 2",
-                    clear_color=[1.0, 1.0, 1.0, 1.0],
-                    interactive=False
-                )
-        with gr.Row(equal_height=True):
-            with gr.Column(scale=1):
-                output_point_map2 = LitModel3D(
-                    label="Point Cloud Key Frame 3",
-                    clear_color=[1.0, 1.0, 1.0, 1.0],
-                    interactive=False
-                )
-            with gr.Column(scale=1):
-                output_point_map3 = LitModel3D(
-                    label="Point Cloud Key Frame 4",
-                    clear_color=[1.0, 1.0, 1.0, 1.0],
-                    interactive=False
-                )
-        def on_submit(video_file, model_size, num_inference_steps, overlap):
-            if video_file is None:
-                return None, None, None, None, None, None, "Please upload a video file"
-            try:
-                output_path, glb_files = process_video(
-                    video_file, model_size, height, width, num_inference_steps, window_size, overlap
-                )
-                if output_path is None:
-                    return None, None, None, None, None, None, glb_files
-                model3d_outputs = [None] * 4
-                if glb_files:
-                    for i, glb_file in enumerate(glb_files[:4]):
-                        if os.path.exists(glb_file):
-                            model3d_outputs[i] = glb_file
-                return output_path, None, *model3d_outputs
-            except Exception as e:
-                return None, None, None, None, None, None, f"Error: {str(e)}"
-        submit.click(
-            on_submit,
-            inputs=[
-                input_video, model_size, num_inference_steps, overlap
-            ],
             outputs=[
                 output_video, vis_video,
                 output_point_map0, output_point_map1, output_point_map2, output_point_map3
-            ]
         )
-        example_files = glob.glob('examples/*')
-        if example_files:
-            example_inputs = []
-            for file_path in example_files:
-                example_inputs.append([file_path, "1.3B", 5, 3])
-            examples = gr.Examples(
-                examples=example_inputs,
-                inputs=[input_video, model_size, num_inference_steps, overlap],
-                outputs=[
-                    output_video, vis_video,
-                    output_point_map0, output_point_map1, output_point_map2, output_point_map3
-                ],
-                fn=on_submit,
-                examples_per_page=6
-            )
     #* main code, model and moge model initialization
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     load_model_1_3b(device=device)
     load_moge_model(device=device)
     torch.cuda.empty_cache()
-    demo.queue().launch(share = True,server_name="0.0.0.0", server_port=7860)
-if __name__ == '__main__':
-    main()

+#* gradio creation and initialization
+css = """
+#video-display-container {
+    max-height: 100vh;
+}
+#video-display-input {
+    max-height: 80vh;
+}
+#video-display-output {
+    max-height: 80vh;
+}
+#download {
+    height: 62px;
+}
+.title {
+    text-align: center;
+}
+.description {
+    text-align: center;
+}
+.gradio-examples {
+    max-height: 400px;
+    overflow-y: auto;
+}
+.gradio-examples .examples-container {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+    gap: 10px;
+    padding: 10px;
+}
+.gradio-container .gradio-examples .pagination,
+.gradio-container .gradio-examples .pagination button,
+div[data-testid="examples"] .pagination,
+div[data-testid="examples"] .pagination button {
+    font-size: 28px !important;
+    font-weight: bold !important;
+    padding: 15px 20px !important;
+    min-width: 60px !important;
+    height: 60px !important;
+    border-radius: 10px !important;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    color: white !important;
+    border: none !important;
+    cursor: pointer !important;
+    margin: 8px !important;
+    display: inline-block !important;
+    box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
+    transition: all 0.3s ease !important;
+}
+div[data-testid="examples"] .pagination button:not(.active),
+.gradio-container .gradio-examples .pagination button:not(.active) {
+    font-size: 32px !important;
+    font-weight: bold !important;
+    padding: 15px 20px !important;
+    min-width: 60px !important;
+    height: 60px !important;
+    background: linear-gradient(135deg, #8a9cf0 0%, #9a6bb2 100%) !important;
+    opacity: 0.8 !important;
+}
+div[data-testid="examples"] .pagination button:hover,
+.gradio-container .gradio-examples .pagination button:hover {
+    background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%) !important;
+    transform: translateY(-2px) !important;
+    box-shadow: 0 6px 12px rgba(0,0,0,0.3) !important;
+    opacity: 1 !important;
+}
+div[data-testid="examples"] .pagination button.active,
+.gradio-container .gradio-examples .pagination button.active {
+    background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%) !important;
+    box-shadow: 0 4px 8px rgba(17,153,142,0.4) !important;
+    opacity: 1 !important;
+}
+button[class*="pagination"],
+button[class*="page"] {
+    font-size: 28px !important;
+    font-weight: bold !important;
+    padding: 15px 20px !important;
+    min-width: 60px !important;
+    height: 60px !important;
+    border-radius: 10px !important;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    color: white !important;
+    border: none !important;
+    cursor: pointer !important;
+    margin: 8px !important;
+    box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
+    transition: all 0.3s ease !important;
+}
+"""
+head_html = """
+<link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
+<link rel="shortcut icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
+<link rel="icon" type="image/png" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+"""
+# description = """Official demo for **DKT **."""
+# with gr.Blocks(css=css, title="DKT - Diffusion Knows Transparency", favicon_path="favicon.ico") as demo:
+height = 480
+width = 832
+window_size = 21
+with gr.Blocks(css=css, title="DKT", head=head_html) as demo:
+    # gr.Markdown(title, elem_classes=["title"])
     """
+    <a title="Website" href="https://stable-x.github.io/StableNormal/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+            <img src="https://www.obukhov.ai/img/badges/badge-website.svg">
+        </a>
+        <a title="arXiv" href="https://arxiv.org/abs/2406.16864" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+            <img src="https://www.obukhov.ai/img/badges/badge-pdf.svg">
+        </a>
+        <a title="Social" href="https://x.com/ychngji6" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+            <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
+        </a>
     """
+    gr.Markdown(
+        """
+        # Diffusion Knows Transparency: Repurposing Video Diffusion for Transparent Object Depth and Normal Estimation
+        <p align="center">
+        <a title="Github" href="https://github.com/Daniellli/DKT" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+            <img src="https://img.shields.io/github/stars/Daniellli/DKT?style=social" alt="badge-github-stars">
+        </a>
+    """
+    )
+    # gr.Markdown(description, elem_classes=["description"])
+    # gr.Markdown("### Video Processing Demo", elem_classes=["description"])
+    with gr.Row():
+        with gr.Column():
+            input_video = gr.Video(label="Input Video", elem_id='video-display-input')
+            model_size = gr.Radio(
+                choices=["1.3B", "14B"],
+                value="1.3B",
+                label="Model Size"
+            )
+            with gr.Accordion("Advanced Parameters", open=False):
+                num_inference_steps = gr.Slider(
+                    minimum=1, maximum=50, value=5, step=1,
+                    label="Number of Inference Steps"
+                )
+                overlap = gr.Slider(
+                    minimum=1, maximum=20, value=3, step=1,
+                    label="Overlap"
+                )
+            submit = gr.Button(value="Compute Depth", variant="primary")
+        with gr.Column():
+            output_video = gr.Video(
+                label="Depth Outputs",
+                elem_id='video-display-output',
+                autoplay=True
+            )
+            vis_video = gr.Video(
+                label="Visualization Video",
+                visible=False,
+                autoplay=True
+            )
+    with gr.Row():
+        gr.Markdown("### 3D Point Cloud Visualization", elem_classes=["title"])
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=1):
+            output_point_map0 = LitModel3D(
+                label="Point Cloud Key Frame 1",
+                clear_color=[1.0, 1.0, 1.0, 1.0],
+                interactive=False,
+                # height=400,
+            )
+        with gr.Column(scale=1):
+            output_point_map1 = LitModel3D(
+                label="Point Cloud Key Frame 2",
+                clear_color=[1.0, 1.0, 1.0, 1.0],
+                interactive=False
+            )
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=1):
+            output_point_map2 = LitModel3D(
+                label="Point Cloud Key Frame 3",
+                clear_color=[1.0, 1.0, 1.0, 1.0],
+                interactive=False
+            )
+        with gr.Column(scale=1):
+            output_point_map3 = LitModel3D(
+                label="Point Cloud Key Frame 4",
+                clear_color=[1.0, 1.0, 1.0, 1.0],
+                interactive=False
+            )
+    def on_submit(video_file, model_size, num_inference_steps, overlap):
+        if video_file is None:
+            return None, None, None, None, None, None, "Please upload a video file"
+        try:
+            output_path, glb_files = process_video(
+                video_file, model_size, height, width, num_inference_steps, window_size, overlap
+            )
+            if output_path is None:
+                return None, None, None, None, None, None, glb_files
+            model3d_outputs = [None] * 4
+            if glb_files:
+                for i, glb_file in enumerate(glb_files[:4]):
+                    if os.path.exists(glb_file):
+                        model3d_outputs[i] = glb_file
+            return output_path, None, *model3d_outputs
+        except Exception as e:
+            logger.error(e)
+            return None, None, None, None, None, None
+    submit.click(
+        on_submit,
+        inputs=[
+            input_video, model_size, num_inference_steps, overlap
+        ],
+        outputs=[
+            output_video, vis_video,
+            output_point_map0, output_point_map1, output_point_map2, output_point_map3
+        ]
+    )
+    example_files = glob.glob('examples/*')
+    logger.info(f'there are {len(example_files)} demo files')
+    if example_files:
+        example_inputs = []
+        for file_path in example_files:
+            example_inputs.append([file_path, "1.3B", 5, 3])
+        examples = gr.Examples(
+            examples=example_inputs,
+            inputs=[input_video, model_size, num_inference_steps, overlap],
             outputs=[
                 output_video, vis_video,
                 output_point_map0, output_point_map1, output_point_map2, output_point_map3
+            ],
+            fn=on_submit,
+            examples_per_page=6
         )
+if __name__ == '__main__':
     #* main code, model and moge model initialization
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    logger.info(f"device = {device}")
     load_model_1_3b(device=device)
     load_moge_model(device=device)
     torch.cuda.empty_cache()
+    demo.queue().launch(share = False,server_name="0.0.0.0", server_port=7860)