Spaces:

Mountchicken
/

Rex-Omni

Running on Zero

App Files Files Community

Mountchicken commited on 16 days ago

Commit

ced4fcf

verified ·

1 Parent(s): a171f59

Upload 35 files

Browse files

Files changed (36) hide show

.gitattributes +9 -0
tutorials/.DS_Store +0 -0
tutorials/detection_example/.DS_Store +0 -0
tutorials/detection_example/_full_notebook.ipynb +0 -0
tutorials/detection_example/detection_example.py +70 -0
tutorials/detection_example/gui_grounding_example.py +60 -0
tutorials/detection_example/layout_grouding_examle.py +59 -0
tutorials/detection_example/referring_example.py +64 -0
tutorials/detection_example/test_images/boys.jpg +3 -0
tutorials/detection_example/test_images/cafe.jpg +3 -0
tutorials/detection_example/test_images/gui.png +3 -0
tutorials/detection_example/test_images/layout.jpg +3 -0
tutorials/keypointing_example/.DS_Store +0 -0
tutorials/keypointing_example/_full_tutorial.ipynb +0 -0
tutorials/keypointing_example/animal_keypointing_example.py +63 -0
tutorials/keypointing_example/person_keypointing_example.py +64 -0
tutorials/keypointing_example/test_images/animal.png +3 -0
tutorials/keypointing_example/test_images/person.png +3 -0
tutorials/ocr_example/.DS_Store +0 -0
tutorials/ocr_example/_full_tutorial.ipynb +0 -0
tutorials/ocr_example/ocr_polygon_example.py +69 -0
tutorials/ocr_example/ocr_textline_box_example.py +68 -0
tutorials/ocr_example/ocr_word_box_example.py +68 -0
tutorials/ocr_example/test_images/ocr.png +3 -0
tutorials/other_example/batch_inference.py +86 -0
tutorials/pointing_example.py +58 -0
tutorials/pointing_example/.DS_Store +0 -0
tutorials/pointing_example/_full_tutorial.ipynb +0 -0
tutorials/pointing_example/affordance_pointing_example.py +69 -0
tutorials/pointing_example/gui_pointing_example.py +68 -0
tutorials/pointing_example/object_pointing_example.py +67 -0
tutorials/pointing_example/test_images/boxes.jpg +3 -0
tutorials/pointing_example/test_images/cup.png +0 -0
tutorials/visual_prompting_example/_full_tutorial.ipynb +0 -0
tutorials/visual_prompting_example/test_images/pigeons.jpeg +3 -0
tutorials/visual_prompting_example/visual_prompt_example.py +72 -0

.gitattributes CHANGED Viewed

@@ -43,3 +43,12 @@ assets/logo.png filter=lfs diff=lfs merge=lfs -text
 assets/object_pointing_visualize.jpg filter=lfs diff=lfs merge=lfs -text
 assets/pigeons_visualize.jpg filter=lfs diff=lfs merge=lfs -text
 assets/teaser.png filter=lfs diff=lfs merge=lfs -text

 assets/object_pointing_visualize.jpg filter=lfs diff=lfs merge=lfs -text
 assets/pigeons_visualize.jpg filter=lfs diff=lfs merge=lfs -text
 assets/teaser.png filter=lfs diff=lfs merge=lfs -text
+tutorials/detection_example/test_images/boys.jpg filter=lfs diff=lfs merge=lfs -text
+tutorials/detection_example/test_images/cafe.jpg filter=lfs diff=lfs merge=lfs -text
+tutorials/detection_example/test_images/gui.png filter=lfs diff=lfs merge=lfs -text
+tutorials/detection_example/test_images/layout.jpg filter=lfs diff=lfs merge=lfs -text
+tutorials/keypointing_example/test_images/animal.png filter=lfs diff=lfs merge=lfs -text
+tutorials/keypointing_example/test_images/person.png filter=lfs diff=lfs merge=lfs -text
+tutorials/ocr_example/test_images/ocr.png filter=lfs diff=lfs merge=lfs -text
+tutorials/pointing_example/test_images/boxes.jpg filter=lfs diff=lfs merge=lfs -text
+tutorials/visual_prompting_example/test_images/pigeons.jpeg filter=lfs diff=lfs merge=lfs -text

tutorials/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

tutorials/detection_example/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

tutorials/detection_example/_full_notebook.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

tutorials/detection_example/detection_example.py ADDED Viewed

	@@ -0,0 +1,70 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Basic object detection example using Rex Omni
+"""
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # or "vllm" for faster inference
+        max_tokens=4096,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load imag
+    image_path = "tutorials/detection_example/test_images/cafe.jpg"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    # Object detection
+    categories = [
+        "man",
+        "woman",
+        "yellow flower",
+        "sofa",
+        "robot-shope light",
+        "blanket",
+        "microwave",
+        "laptop",
+        "cup",
+        "white chair",
+        "lamp",
+    ]
+    results = rex_model.inference(images=image, task="detection", categories=categories)
+    # Print results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=20,
+            draw_width=5,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = "tutorials/detection_example/test_images/cafe_visualize.jpg"
+        vis_image.save(output_path)
+        print(f"Visualization saved to: {output_path}")
+    else:
+        print(f"Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/detection_example/gui_grounding_example.py ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Basic object detection example using Rex Omni
+"""
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # or "vllm" for faster inference
+        max_tokens=4096,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = "tutorials/detection_example/test_images/gui.png"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    # Object detection
+    categories = ["more information of song 'Photograph'"]
+    results = rex_model.inference(
+        images=image, task="gui_grounding", categories=categories
+    )
+    # Print results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=20,
+            draw_width=5,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = "tutorials/detection_example/test_images/gui_visualize.jpg"
+        vis_image.save(output_path)
+        print(f"Visualization saved to: {output_path}")
+    else:
+        print(f"Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/detection_example/layout_grouding_examle.py ADDED Viewed

	@@ -0,0 +1,59 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Basic object detection example using Rex Omni
+"""
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # or "vllm" for faster inference
+        max_tokens=4096,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = "tutorials/detection_example/test_images/layout.jpg"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    # Object detection
+    categories = ["header", "headline", "paragraph", "page number", "figure", "section"]
+    results = rex_model.inference(images=image, task="detection", categories=categories)
+    # Print results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=20,
+            draw_width=5,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = "tutorials/detection_example/test_images/layout_visualize.jpg"
+        vis_image.save(output_path)
+        print(f"Visualization saved to: {output_path}")
+    else:
+        print(f"Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/detection_example/referring_example.py ADDED Viewed

	@@ -0,0 +1,64 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Basic object detection example using Rex Omni
+"""
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # or "vllm" for faster inference
+        max_tokens=4096,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = "tutorials/detection_example/test_images/boys.jpg"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    # Object detection
+    categories = [
+        "boys holding microphone",
+        "boy playing piano",
+        "the four guitars on the wall",
+        "the guitar in someone's hand",
+    ]
+    results = rex_model.inference(images=image, task="detection", categories=categories)
+    # Print results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=20,
+            draw_width=5,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = "tutorials/detection_example/test_images/boys_visualize.jpg"
+        vis_image.save(output_path)
+        print(f"Visualization saved to: {output_path}")
+    else:
+        print(f"Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/detection_example/test_images/boys.jpg ADDED Viewed

Git LFS Details

SHA256: 48553869139c5a2c6715e55018499738cb2909b9101d67ad0650359c34e48770
Pointer size: 131 Bytes
Size of remote file: 226 kB

tutorials/detection_example/test_images/cafe.jpg ADDED Viewed

Git LFS Details

SHA256: 190c2d80267af5eaf3a2e5096fdc9be2159d2da750bd2e9b5b552959935d9ddb
Pointer size: 131 Bytes
Size of remote file: 228 kB

tutorials/detection_example/test_images/gui.png ADDED Viewed

Git LFS Details

SHA256: 1c9ac72fa336563dad1e0ac29b8c79a11fdd0af443293eb8ded5c13658b62884
Pointer size: 131 Bytes
Size of remote file: 460 kB

tutorials/detection_example/test_images/layout.jpg ADDED Viewed

Git LFS Details

SHA256: eebbbba76614f82da657edf792f7ebc20e34e4fdf688094fe87a8ea56f1c0bd0
Pointer size: 131 Bytes
Size of remote file: 256 kB

tutorials/keypointing_example/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

tutorials/keypointing_example/_full_tutorial.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

tutorials/keypointing_example/animal_keypointing_example.py ADDED Viewed

	@@ -0,0 +1,63 @@

+"""
+Animal keypointing example using Rex Omni
+"""
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    print("🚀 Initializing Rex Omni model...")
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # Choose "transformers" or "vllm"
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = "tutorials/keypointing_example/test_images/animal.png"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    print(f"✅ Image loaded successfully!")
+    print(f"📏 Image size: {image.size}")
+    # Animal keypointing
+    print("🐱 Performing animal keypointing...")
+    results = rex_model.inference(
+        images=image, task="keypoint", keypoint_type="animal", categories=["cat"]
+    )
+    # Process results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=6,
+            draw_width=6,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = (
+            "tutorials/keypointing_example/test_images/animal_keypointing_visualize.jpg"
+        )
+        vis_image.save(output_path)
+        print(f"✅ Animal keypointing visualization saved to: {output_path}")
+    else:
+        print(f"❌ Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/keypointing_example/person_keypointing_example.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""
+Person keypointing example using Rex Omni
+"""
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    print("🚀 Initializing Rex Omni model...")
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # Choose "transformers" or "vllm"
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = "tutorials/keypointing_example/test_images/person.png"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    print(f"✅ Image loaded successfully!")
+    print(f"📏 Image size: {image.size}")
+    # Person keypointing
+    print("👤 Performing person keypointing...")
+    results = rex_model.inference(
+        images=image, task="keypoint", keypoint_type="person", categories=["person"]
+    )
+    # Process results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=6,
+            draw_width=6,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = (
+            "tutorials/keypointing_example/test_images/person_keypointing_visualize.jpg"
+        )
+        vis_image.save(output_path)
+        print(f"✅ Person keypointing visualization saved to: {output_path}")
+    else:
+        print(f"❌ Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/keypointing_example/test_images/animal.png ADDED Viewed

Git LFS Details

SHA256: 03f0272be742c0ee6e7d429aed35a9eaf5dccc30ac716e328341d5d6165f12e1
Pointer size: 131 Bytes
Size of remote file: 272 kB

tutorials/keypointing_example/test_images/person.png ADDED Viewed

Git LFS Details

SHA256: 269f5a3414a9fa94d811c19a1b1a0c7260c52552f1d37789b7331a9ff37ec008
Pointer size: 131 Bytes
Size of remote file: 165 kB

tutorials/ocr_example/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

tutorials/ocr_example/_full_tutorial.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

tutorials/ocr_example/ocr_polygon_example.py ADDED Viewed

	@@ -0,0 +1,69 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+OCR text line-level detection example using Rex Omni (polygon format)
+"""
+import matplotlib.pyplot as plt
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
+    print("🚀 Initializing Rex Omni model...")
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # Choose "transformers" or "vllm"
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = (
+        "tutorials/ocr_example/test_images/ocr.png"  # Replace with your image path
+    )
+    image = Image.open(image_path).convert("RGB")
+    print(f"✅ Image loaded successfully!")
+    print(f"📏 Image size: {image.size}")
+    # OCR text line-level detection in polygon format
+    categories = ["text line"]
+    print("🔍 Performing text line-level OCR detection (polygon format)...")
+    results = rex_model.inference(
+        images=image, task="ocr_polygon", categories=categories
+    )
+    # Process results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=15,
+            draw_width=5,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = "tutorials/ocr_example/test_images/ocr_polygon_visualize.jpg"
+        vis_image.save(output_path)
+        print(f"✅ Polygon OCR visualization saved to: {output_path}")
+    else:
+        print(f"❌ Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/ocr_example/ocr_textline_box_example.py ADDED Viewed

	@@ -0,0 +1,68 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+OCR text line-level detection example using Rex Omni (box format)
+"""
+import matplotlib.pyplot as plt
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
+    print("🚀 Initializing Rex Omni model...")
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # Choose "transformers" or "vllm"
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = (
+        "tutorials/ocr_example/test_images/ocr.png"  # Replace with your image path
+    )
+    image = Image.open(image_path).convert("RGB")
+    print(f"✅ Image loaded successfully!")
+    print(f"📏 Image size: {image.size}")
+    # OCR text line-level detection in box format
+    categories = ["text line"]
+    print("🔍 Performing text line-level OCR detection...")
+    results = rex_model.inference(images=image, task="ocr_box", categories=categories)
+    # Process results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=15,
+            draw_width=3,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = "tutorials/ocr_example/test_images/ocr_textline_box_visualize.jpg"
+        vis_image.save(output_path)
+        print(f"✅ Text line-level OCR visualization saved to: {output_path}")
+    else:
+        print(f"❌ Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/ocr_example/ocr_word_box_example.py ADDED Viewed

	@@ -0,0 +1,68 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+OCR word-level detection example using Rex Omni (box format)
+"""
+import matplotlib.pyplot as plt
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
+    print("🚀 Initializing Rex Omni model...")
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # Choose "transformers" or "vllm"
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = (
+        "tutorials/ocr_example/test_images/ocr.png"  # Replace with your image path
+    )
+    image = Image.open(image_path).convert("RGB")
+    print(f"✅ Image loaded successfully!")
+    print(f"📏 Image size: {image.size}")
+    # OCR word-level detection in box format
+    categories = ["word"]
+    print("🔍 Performing word-level OCR detection...")
+    results = rex_model.inference(images=image, task="ocr_box", categories=categories)
+    # Process results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=20,
+            draw_width=5,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = "tutorials/ocr_example/test_images/ocr_word_box_visualize.jpg"
+        vis_image.save(output_path)
+        print(f"✅ Word-level OCR visualization saved to: {output_path}")
+    else:
+        print(f"❌ Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/ocr_example/test_images/ocr.png ADDED Viewed

Git LFS Details

SHA256: 840960ea5f5dad0d0608b602ada59ece9b9eef032e6d56df11bbaaf6a31bd35e
Pointer size: 131 Bytes
Size of remote file: 288 kB

tutorials/other_example/batch_inference.py ADDED Viewed

	@@ -0,0 +1,86 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Batch Inference example using Rex-Omni
+"""
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # or "vllm" for faster inference
+        max_tokens=4096,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load imag
+    image_paths = [
+        "tutorials/detection_example/test_images/cafe.jpg",
+        "tutorials/detection_example/test_images/boys.jpg",
+    ]
+    images = [Image.open(image_path).convert("RGB") for image_path in image_paths]
+    # Object detection
+    categories = [
+        [
+            "man",
+            "woman",
+            "yellow flower",
+            "sofa",
+            "robot-shope light",
+            "blanket",
+            "microwave",
+            "laptop",
+            "cup",
+            "white chair",
+            "lamp",
+        ],
+        [
+            "boys holding microphone",
+            "boy playing piano",
+            "the four guitars on the wall",
+            "the guitar in someone's hand",
+        ],
+    ]
+    results = rex_model.inference(
+        images=images, task=["detection", "detection"], categories=categories
+    )
+    # Print results
+    batch_idx = 0
+    for result, image in zip(results, images):
+        if result["success"]:
+            predictions = result["extracted_predictions"]
+            vis_image = RexOmniVisualize(
+                image=image,
+                predictions=predictions,
+                font_size=20,
+                draw_width=5,
+                show_labels=True,
+            )
+            # Save visualization
+            output_path = f"tutorials/other_example/batch_inference_{batch_idx}.jpg"
+            vis_image.save(output_path)
+            print(f"Visualization saved to: {output_path}")
+        else:
+            print(f"Inference failed: {result['error']}")
+        batch_idx += 1
+if __name__ == "__main__":
+    main()

tutorials/pointing_example.py ADDED Viewed

	@@ -0,0 +1,58 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Basic object detection example using Rex Omni
+"""
+import torch
+from PIL import Image
+from rex_omni import RexOmniWrapper, visualize_predictions
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # or "vllm" for faster inference
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = "examples/test_images/pigeon.jpeg"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    # Object detection
+    categories = ["pigeons"]
+    results = rex_model.inference(images=image, task="pointing", categories=categories)
+    # Print results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = visualize_predictions(
+            image=image,
+            predictions=predictions,
+            font_size=20,
+            draw_width=10,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = "examples/test_images/pigeon_visualize.jpg"
+        vis_image.save(output_path)
+        print(f"Visualization saved to: {output_path}")
+    else:
+        print(f"Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/pointing_example/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

tutorials/pointing_example/_full_tutorial.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

tutorials/pointing_example/affordance_pointing_example.py ADDED Viewed

	@@ -0,0 +1,69 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Affordance pointing example using Rex Omni
+"""
+import matplotlib.pyplot as plt
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    print("🚀 Initializing Rex Omni model...")
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # Choose "transformers" or "vllm"
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = (
+        "tutorials/pointing_example/test_images/cup.png"  # Replace with your image path
+    )
+    image = Image.open(image_path).convert("RGB")
+    print(f"✅ Image loaded successfully!")
+    print(f"📏 Image size: {image.size}")
+    # Affordance pointing - where to hold the cup
+    categories = ["where I can hold the green cup"]
+    print("🤏 Performing affordance pointing...")
+    results = rex_model.inference(images=image, task="pointing", categories=categories)
+    # Process results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=20,
+            draw_width=10,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = (
+            "tutorials/pointing_example/test_images/affordance_pointing_visualize.jpg"
+        )
+        vis_image.save(output_path)
+        print(f"✅ Affordance pointing visualization saved to: {output_path}")
+    else:
+        print(f"❌ Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/pointing_example/gui_pointing_example.py ADDED Viewed

	@@ -0,0 +1,68 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+GUI pointing example using Rex Omni
+"""
+import matplotlib.pyplot as plt
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    print("🚀 Initializing Rex Omni model...")
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # Choose "transformers" or "vllm"
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = "tutorials/detection_example/test_images/gui.png"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    print(f"✅ Image loaded successfully!")
+    print(f"📏 Image size: {image.size}")
+    # GUI pointing - find specific UI element
+    categories = ["element 'pause current song'"]
+    print("🖱️ Performing GUI pointing...")
+    results = rex_model.inference(images=image, task="pointing", categories=categories)
+    # Process results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=50,
+            draw_width=15,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = (
+            "tutorials/pointing_example/test_images/gui_pointing_visualize.jpg"
+        )
+        vis_image.save(output_path)
+        print(f"✅ GUI pointing visualization saved to: {output_path}")
+    else:
+        print(f"❌ Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/pointing_example/object_pointing_example.py ADDED Viewed

	@@ -0,0 +1,67 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Object pointing example using Rex Omni
+"""
+import matplotlib.pyplot as plt
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    print("🚀 Initializing Rex Omni model...")
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # Choose "transformers" or "vllm"
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = "tutorials/pointing_example/test_images/boxes.jpg"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    print(f"✅ Image loaded successfully!")
+    print(f"📏 Image size: {image.size}")
+    # Object pointing
+    categories = ["open boxes", "closed boxes"]
+    print("🎯 Performing object pointing...")
+    results = rex_model.inference(images=image, task="pointing", categories=categories)
+    # Process results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=30,
+            draw_width=10,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = (
+            "tutorials/pointing_example/test_images/object_pointing_visualize.jpg"
+        )
+        vis_image.save(output_path)
+        print(f"✅ Object pointing visualization saved to: {output_path}")
+    else:
+        print(f"❌ Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()

tutorials/pointing_example/test_images/boxes.jpg ADDED Viewed

Git LFS Details

SHA256: 753771d345263b58c16dafb5b6c4e730d21d211361b57933cb496ada3f2c311f
Pointer size: 131 Bytes
Size of remote file: 350 kB

tutorials/pointing_example/test_images/cup.png ADDED Viewed

tutorials/visual_prompting_example/_full_tutorial.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

tutorials/visual_prompting_example/test_images/pigeons.jpeg ADDED Viewed

Git LFS Details

SHA256: b4b2db805cfec8704516645df54d01c0dd21c4447250889ac655575622556689
Pointer size: 132 Bytes
Size of remote file: 1.14 MB

tutorials/visual_prompting_example/visual_prompt_example.py ADDED Viewed

	@@ -0,0 +1,72 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Object pointing example using Rex Omni
+"""
+import matplotlib.pyplot as plt
+import torch
+from PIL import Image
+from rex_omni import RexOmniVisualize, RexOmniWrapper
+def main():
+    # Model path - replace with your actual model path
+    model_path = "IDEA-Research/Rex-Omni"
+    print("🚀 Initializing Rex Omni model...")
+    # Create wrapper with custom parameters
+    rex_model = RexOmniWrapper(
+        model_path=model_path,
+        backend="transformers",  # Choose "transformers" or "vllm"
+        max_tokens=2048,
+        temperature=0.0,
+        top_p=0.05,
+        top_k=1,
+        repetition_penalty=1.05,
+    )
+    # Load image
+    image_path = "tutorials/visual_prompting_example/test_images/pigeons.jpeg"  # Replace with your image path
+    image = Image.open(image_path).convert("RGB")
+    print(f"✅ Image loaded successfully!")
+    print(f"📏 Image size: {image.size}")
+    visual_prompts = [
+        [644, 1210, 842, 1361],
+        [1180, 1066, 1227, 1160],  # Box 3: bottom region
+    ]
+    print("🎯 Performing object pointing...")
+    results = rex_model.inference(
+        images=image,
+        task="visual_prompting",
+        visual_prompt_boxes=visual_prompts,
+    )
+    # Process results
+    result = results[0]
+    if result["success"]:
+        predictions = result["extracted_predictions"]
+        vis_image = RexOmniVisualize(
+            image=image,
+            predictions=predictions,
+            font_size=30,
+            draw_width=10,
+            show_labels=True,
+        )
+        # Save visualization
+        output_path = (
+            "tutorials/visual_prompting_example/test_images/pigeons_visualize.jpg"
+        )
+        vis_image.save(output_path)
+    else:
+        print(f"❌ Inference failed: {result['error']}")
+if __name__ == "__main__":
+    main()