Daniellesry commited on
Commit
d823b65
·
1 Parent(s): 4ef344d
Files changed (1) hide show
  1. app.py +251 -254
app.py CHANGED
@@ -396,293 +396,290 @@ def process_video(
396
 
397
 
398
 
399
- def main():
400
-
401
-
402
-
403
- #* gradio creation and initialization
404
-
405
-
406
- css = """
407
- #video-display-container {
408
- max-height: 100vh;
409
- }
410
- #video-display-input {
411
- max-height: 80vh;
412
- }
413
- #video-display-output {
414
- max-height: 80vh;
415
- }
416
- #download {
417
- height: 62px;
418
- }
419
- .title {
420
- text-align: center;
421
- }
422
- .description {
423
- text-align: center;
424
- }
425
- .gradio-examples {
426
- max-height: 400px;
427
- overflow-y: auto;
428
- }
429
- .gradio-examples .examples-container {
430
- display: grid;
431
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
432
- gap: 10px;
433
- padding: 10px;
434
- }
435
- .gradio-container .gradio-examples .pagination,
436
- .gradio-container .gradio-examples .pagination button,
437
- div[data-testid="examples"] .pagination,
438
- div[data-testid="examples"] .pagination button {
439
- font-size: 28px !important;
440
- font-weight: bold !important;
441
- padding: 15px 20px !important;
442
- min-width: 60px !important;
443
- height: 60px !important;
444
- border-radius: 10px !important;
445
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
446
- color: white !important;
447
- border: none !important;
448
- cursor: pointer !important;
449
- margin: 8px !important;
450
- display: inline-block !important;
451
- box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
452
- transition: all 0.3s ease !important;
453
- }
454
-
455
- div[data-testid="examples"] .pagination button:not(.active),
456
- .gradio-container .gradio-examples .pagination button:not(.active) {
457
- font-size: 32px !important;
458
- font-weight: bold !important;
459
- padding: 15px 20px !important;
460
- min-width: 60px !important;
461
- height: 60px !important;
462
- background: linear-gradient(135deg, #8a9cf0 0%, #9a6bb2 100%) !important;
463
- opacity: 0.8 !important;
464
- }
465
-
466
- div[data-testid="examples"] .pagination button:hover,
467
- .gradio-container .gradio-examples .pagination button:hover {
468
- background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%) !important;
469
- transform: translateY(-2px) !important;
470
- box-shadow: 0 6px 12px rgba(0,0,0,0.3) !important;
471
- opacity: 1 !important;
472
- }
473
-
474
- div[data-testid="examples"] .pagination button.active,
475
- .gradio-container .gradio-examples .pagination button.active {
476
- background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%) !important;
477
- box-shadow: 0 4px 8px rgba(17,153,142,0.4) !important;
478
- opacity: 1 !important;
479
- }
480
-
481
- button[class*="pagination"],
482
- button[class*="page"] {
483
- font-size: 28px !important;
484
- font-weight: bold !important;
485
- padding: 15px 20px !important;
486
- min-width: 60px !important;
487
- height: 60px !important;
488
- border-radius: 10px !important;
489
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
490
- color: white !important;
491
- border: none !important;
492
- cursor: pointer !important;
493
- margin: 8px !important;
494
- box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
495
- transition: all 0.3s ease !important;
496
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  """
498
 
499
-
500
-
501
- head_html = """
502
- <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
503
- <link rel="shortcut icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
504
- <link rel="icon" type="image/png" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
505
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
 
 
 
506
  """
507
 
 
 
 
 
 
 
 
 
 
 
 
508
 
509
-
510
- # description = """Official demo for **DKT **."""
 
 
 
 
 
 
 
511
 
512
- # with gr.Blocks(css=css, title="DKT - Diffusion Knows Transparency", favicon_path="favicon.ico") as demo:
513
 
514
- height = 480
515
- width = 832
516
- window_size = 21
517
- with gr.Blocks(css=css, title="DKT", head=head_html) as demo:
518
- # gr.Markdown(title, elem_classes=["title"])
519
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
- <a title="Website" href="https://stable-x.github.io/StableNormal/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
522
- <img src="https://www.obukhov.ai/img/badges/badge-website.svg">
523
- </a>
524
- <a title="arXiv" href="https://arxiv.org/abs/2406.16864" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
525
- <img src="https://www.obukhov.ai/img/badges/badge-pdf.svg">
526
- </a>
527
- <a title="Social" href="https://x.com/ychngji6" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
528
- <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
529
- </a>
 
 
 
 
 
 
 
 
 
530
 
 
 
531
 
532
- """
533
-
534
- gr.Markdown(
535
- """
536
- # Diffusion Knows Transparency: Repurposing Video Diffusion for Transparent Object Depth and Normal Estimation
537
- <p align="center">
538
- <a title="Github" href="https://github.com/Daniellli/DKT" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
539
- <img src="https://img.shields.io/github/stars/Daniellli/DKT?style=social" alt="badge-github-stars">
540
- </a>
541
- """
542
- )
543
- # gr.Markdown(description, elem_classes=["description"])
544
- # gr.Markdown("### Video Processing Demo", elem_classes=["description"])
545
 
546
- with gr.Row():
547
- with gr.Column():
548
- input_video = gr.Video(label="Input Video", elem_id='video-display-input')
549
-
550
- model_size = gr.Radio(
551
- choices=["1.3B", "14B"],
552
- value="1.3B",
553
- label="Model Size"
554
- )
555
 
 
 
 
556
 
557
- with gr.Accordion("Advanced Parameters", open=False):
558
- num_inference_steps = gr.Slider(
559
- minimum=1, maximum=50, value=5, step=1,
560
- label="Number of Inference Steps"
561
- )
562
- overlap = gr.Slider(
563
- minimum=1, maximum=20, value=3, step=1,
564
- label="Overlap"
565
- )
566
-
567
- submit = gr.Button(value="Compute Depth", variant="primary")
568
 
569
- with gr.Column():
570
- output_video = gr.Video(
571
- label="Depth Outputs",
572
- elem_id='video-display-output',
573
- autoplay=True
574
- )
575
- vis_video = gr.Video(
576
- label="Visualization Video",
577
- visible=False,
578
- autoplay=True
579
- )
580
-
581
- with gr.Row():
582
- gr.Markdown("### 3D Point Cloud Visualization", elem_classes=["title"])
583
-
584
- with gr.Row(equal_height=True):
585
- with gr.Column(scale=1):
586
- output_point_map0 = LitModel3D(
587
- label="Point Cloud Key Frame 1",
588
- clear_color=[1.0, 1.0, 1.0, 1.0],
589
- interactive=False,
590
- # height=400,
591
-
592
- )
593
- with gr.Column(scale=1):
594
- output_point_map1 = LitModel3D(
595
- label="Point Cloud Key Frame 2",
596
- clear_color=[1.0, 1.0, 1.0, 1.0],
597
- interactive=False
598
- )
599
 
600
-
601
- with gr.Row(equal_height=True):
602
 
603
- with gr.Column(scale=1):
604
- output_point_map2 = LitModel3D(
605
- label="Point Cloud Key Frame 3",
606
- clear_color=[1.0, 1.0, 1.0, 1.0],
607
- interactive=False
608
- )
609
- with gr.Column(scale=1):
610
- output_point_map3 = LitModel3D(
611
- label="Point Cloud Key Frame 4",
612
- clear_color=[1.0, 1.0, 1.0, 1.0],
613
- interactive=False
614
- )
615
-
616
- def on_submit(video_file, model_size, num_inference_steps, overlap):
617
- if video_file is None:
618
- return None, None, None, None, None, None, "Please upload a video file"
619
 
620
- try:
621
 
622
- output_path, glb_files = process_video(
623
- video_file, model_size, height, width, num_inference_steps, window_size, overlap
624
- )
625
 
 
626
 
627
-
628
- if output_path is None:
629
- return None, None, None, None, None, None, glb_files
630
-
631
- model3d_outputs = [None] * 4
632
- if glb_files:
633
- for i, glb_file in enumerate(glb_files[:4]):
634
- if os.path.exists(glb_file):
635
- model3d_outputs[i] = glb_file
636
-
637
-
638
-
639
- return output_path, None, *model3d_outputs
640
-
641
- except Exception as e:
642
- return None, None, None, None, None, None, f"Error: {str(e)}"
643
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
 
645
- submit.click(
646
- on_submit,
647
- inputs=[
648
- input_video, model_size, num_inference_steps, overlap
649
- ],
650
  outputs=[
651
  output_video, vis_video,
652
  output_point_map0, output_point_map1, output_point_map2, output_point_map3
653
- ]
 
 
654
  )
655
-
656
-
657
-
658
- example_files = glob.glob('examples/*')
659
- if example_files:
660
- example_inputs = []
661
- for file_path in example_files:
662
- example_inputs.append([file_path, "1.3B", 5, 3])
663
-
664
- examples = gr.Examples(
665
- examples=example_inputs,
666
- inputs=[input_video, model_size, num_inference_steps, overlap],
667
- outputs=[
668
- output_video, vis_video,
669
- output_point_map0, output_point_map1, output_point_map2, output_point_map3
670
- ],
671
- fn=on_submit,
672
- examples_per_page=6
673
- )
674
 
675
 
 
 
676
  #* main code, model and moge model initialization
677
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
678
  load_model_1_3b(device=device)
679
  load_moge_model(device=device)
680
  torch.cuda.empty_cache()
681
 
682
- demo.queue().launch(share = True,server_name="0.0.0.0", server_port=7860)
683
 
684
 
685
- if __name__ == '__main__':
686
-
687
-
688
- main()
 
396
 
397
 
398
 
399
+ #* gradio creation and initialization
400
+
401
+
402
+ css = """
403
+ #video-display-container {
404
+ max-height: 100vh;
405
+ }
406
+ #video-display-input {
407
+ max-height: 80vh;
408
+ }
409
+ #video-display-output {
410
+ max-height: 80vh;
411
+ }
412
+ #download {
413
+ height: 62px;
414
+ }
415
+ .title {
416
+ text-align: center;
417
+ }
418
+ .description {
419
+ text-align: center;
420
+ }
421
+ .gradio-examples {
422
+ max-height: 400px;
423
+ overflow-y: auto;
424
+ }
425
+ .gradio-examples .examples-container {
426
+ display: grid;
427
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
428
+ gap: 10px;
429
+ padding: 10px;
430
+ }
431
+ .gradio-container .gradio-examples .pagination,
432
+ .gradio-container .gradio-examples .pagination button,
433
+ div[data-testid="examples"] .pagination,
434
+ div[data-testid="examples"] .pagination button {
435
+ font-size: 28px !important;
436
+ font-weight: bold !important;
437
+ padding: 15px 20px !important;
438
+ min-width: 60px !important;
439
+ height: 60px !important;
440
+ border-radius: 10px !important;
441
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
442
+ color: white !important;
443
+ border: none !important;
444
+ cursor: pointer !important;
445
+ margin: 8px !important;
446
+ display: inline-block !important;
447
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
448
+ transition: all 0.3s ease !important;
449
+ }
450
+
451
+ div[data-testid="examples"] .pagination button:not(.active),
452
+ .gradio-container .gradio-examples .pagination button:not(.active) {
453
+ font-size: 32px !important;
454
+ font-weight: bold !important;
455
+ padding: 15px 20px !important;
456
+ min-width: 60px !important;
457
+ height: 60px !important;
458
+ background: linear-gradient(135deg, #8a9cf0 0%, #9a6bb2 100%) !important;
459
+ opacity: 0.8 !important;
460
+ }
461
+
462
+ div[data-testid="examples"] .pagination button:hover,
463
+ .gradio-container .gradio-examples .pagination button:hover {
464
+ background: linear-gradient(135deg, #5a6fd8 0%, #6a4190 100%) !important;
465
+ transform: translateY(-2px) !important;
466
+ box-shadow: 0 6px 12px rgba(0,0,0,0.3) !important;
467
+ opacity: 1 !important;
468
+ }
469
+
470
+ div[data-testid="examples"] .pagination button.active,
471
+ .gradio-container .gradio-examples .pagination button.active {
472
+ background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%) !important;
473
+ box-shadow: 0 4px 8px rgba(17,153,142,0.4) !important;
474
+ opacity: 1 !important;
475
+ }
476
+
477
+ button[class*="pagination"],
478
+ button[class*="page"] {
479
+ font-size: 28px !important;
480
+ font-weight: bold !important;
481
+ padding: 15px 20px !important;
482
+ min-width: 60px !important;
483
+ height: 60px !important;
484
+ border-radius: 10px !important;
485
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
486
+ color: white !important;
487
+ border: none !important;
488
+ cursor: pointer !important;
489
+ margin: 8px !important;
490
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
491
+ transition: all 0.3s ease !important;
492
+ }
493
+ """
494
+
495
+
496
+
497
+ head_html = """
498
+ <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
499
+ <link rel="shortcut icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
500
+ <link rel="icon" type="image/png" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'%3E%3Ctext y='.9em' font-size='90'%3E🦾%3C/text%3E%3C/svg%3E">
501
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
502
+ """
503
+
504
+
505
+
506
+ # description = """Official demo for **DKT **."""
507
+
508
+ # with gr.Blocks(css=css, title="DKT - Diffusion Knows Transparency", favicon_path="favicon.ico") as demo:
509
+
510
+ height = 480
511
+ width = 832
512
+ window_size = 21
513
+ with gr.Blocks(css=css, title="DKT", head=head_html) as demo:
514
+ # gr.Markdown(title, elem_classes=["title"])
515
  """
516
 
517
+ <a title="Website" href="https://stable-x.github.io/StableNormal/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
518
+ <img src="https://www.obukhov.ai/img/badges/badge-website.svg">
519
+ </a>
520
+ <a title="arXiv" href="https://arxiv.org/abs/2406.16864" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
521
+ <img src="https://www.obukhov.ai/img/badges/badge-pdf.svg">
522
+ </a>
523
+ <a title="Social" href="https://x.com/ychngji6" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
524
+ <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
525
+ </a>
526
+
527
+
528
  """
529
 
530
+ gr.Markdown(
531
+ """
532
+ # Diffusion Knows Transparency: Repurposing Video Diffusion for Transparent Object Depth and Normal Estimation
533
+ <p align="center">
534
+ <a title="Github" href="https://github.com/Daniellli/DKT" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
535
+ <img src="https://img.shields.io/github/stars/Daniellli/DKT?style=social" alt="badge-github-stars">
536
+ </a>
537
+ """
538
+ )
539
+ # gr.Markdown(description, elem_classes=["description"])
540
+ # gr.Markdown("### Video Processing Demo", elem_classes=["description"])
541
 
542
+ with gr.Row():
543
+ with gr.Column():
544
+ input_video = gr.Video(label="Input Video", elem_id='video-display-input')
545
+
546
+ model_size = gr.Radio(
547
+ choices=["1.3B", "14B"],
548
+ value="1.3B",
549
+ label="Model Size"
550
+ )
551
 
 
552
 
553
+ with gr.Accordion("Advanced Parameters", open=False):
554
+ num_inference_steps = gr.Slider(
555
+ minimum=1, maximum=50, value=5, step=1,
556
+ label="Number of Inference Steps"
557
+ )
558
+ overlap = gr.Slider(
559
+ minimum=1, maximum=20, value=3, step=1,
560
+ label="Overlap"
561
+ )
562
+
563
+ submit = gr.Button(value="Compute Depth", variant="primary")
564
+
565
+ with gr.Column():
566
+ output_video = gr.Video(
567
+ label="Depth Outputs",
568
+ elem_id='video-display-output',
569
+ autoplay=True
570
+ )
571
+ vis_video = gr.Video(
572
+ label="Visualization Video",
573
+ visible=False,
574
+ autoplay=True
575
+ )
576
 
577
+ with gr.Row():
578
+ gr.Markdown("### 3D Point Cloud Visualization", elem_classes=["title"])
579
+
580
+ with gr.Row(equal_height=True):
581
+ with gr.Column(scale=1):
582
+ output_point_map0 = LitModel3D(
583
+ label="Point Cloud Key Frame 1",
584
+ clear_color=[1.0, 1.0, 1.0, 1.0],
585
+ interactive=False,
586
+ # height=400,
587
+
588
+ )
589
+ with gr.Column(scale=1):
590
+ output_point_map1 = LitModel3D(
591
+ label="Point Cloud Key Frame 2",
592
+ clear_color=[1.0, 1.0, 1.0, 1.0],
593
+ interactive=False
594
+ )
595
 
596
+
597
+ with gr.Row(equal_height=True):
598
 
599
+ with gr.Column(scale=1):
600
+ output_point_map2 = LitModel3D(
601
+ label="Point Cloud Key Frame 3",
602
+ clear_color=[1.0, 1.0, 1.0, 1.0],
603
+ interactive=False
604
+ )
605
+ with gr.Column(scale=1):
606
+ output_point_map3 = LitModel3D(
607
+ label="Point Cloud Key Frame 4",
608
+ clear_color=[1.0, 1.0, 1.0, 1.0],
609
+ interactive=False
610
+ )
 
611
 
612
+ def on_submit(video_file, model_size, num_inference_steps, overlap):
613
+ if video_file is None:
614
+ return None, None, None, None, None, None, "Please upload a video file"
615
+
616
+ try:
 
 
 
 
617
 
618
+ output_path, glb_files = process_video(
619
+ video_file, model_size, height, width, num_inference_steps, window_size, overlap
620
+ )
621
 
 
 
 
 
 
 
 
 
 
 
 
622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
 
624
+ if output_path is None:
625
+ return None, None, None, None, None, None, glb_files
626
 
627
+ model3d_outputs = [None] * 4
628
+ if glb_files:
629
+ for i, glb_file in enumerate(glb_files[:4]):
630
+ if os.path.exists(glb_file):
631
+ model3d_outputs[i] = glb_file
 
 
 
 
 
 
 
 
 
 
 
632
 
 
633
 
 
 
 
634
 
635
+ return output_path, None, *model3d_outputs
636
 
637
+ except Exception as e:
638
+ logger.error(e)
639
+ return None, None, None, None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
640
 
641
+
642
+ submit.click(
643
+ on_submit,
644
+ inputs=[
645
+ input_video, model_size, num_inference_steps, overlap
646
+ ],
647
+ outputs=[
648
+ output_video, vis_video,
649
+ output_point_map0, output_point_map1, output_point_map2, output_point_map3
650
+ ]
651
+ )
652
+
653
+
654
+
655
+ example_files = glob.glob('examples/*')
656
+ logger.info(f'there are {len(example_files)} demo files')
657
+ if example_files:
658
+ example_inputs = []
659
+ for file_path in example_files:
660
+ example_inputs.append([file_path, "1.3B", 5, 3])
661
 
662
+ examples = gr.Examples(
663
+ examples=example_inputs,
664
+ inputs=[input_video, model_size, num_inference_steps, overlap],
 
 
665
  outputs=[
666
  output_video, vis_video,
667
  output_point_map0, output_point_map1, output_point_map2, output_point_map3
668
+ ],
669
+ fn=on_submit,
670
+ examples_per_page=6
671
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
 
673
 
674
+ if __name__ == '__main__':
675
+
676
  #* main code, model and moge model initialization
677
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
678
+ logger.info(f"device = {device}")
679
  load_model_1_3b(device=device)
680
  load_moge_model(device=device)
681
  torch.cuda.empty_cache()
682
 
683
+ demo.queue().launch(share = False,server_name="0.0.0.0", server_port=7860)
684
 
685