Spaces:
Running
on
Zero
Running
on
Zero
Upload 35 files
Browse files- .gitattributes +9 -0
- tutorials/.DS_Store +0 -0
- tutorials/detection_example/.DS_Store +0 -0
- tutorials/detection_example/_full_notebook.ipynb +0 -0
- tutorials/detection_example/detection_example.py +70 -0
- tutorials/detection_example/gui_grounding_example.py +60 -0
- tutorials/detection_example/layout_grouding_examle.py +59 -0
- tutorials/detection_example/referring_example.py +64 -0
- tutorials/detection_example/test_images/boys.jpg +3 -0
- tutorials/detection_example/test_images/cafe.jpg +3 -0
- tutorials/detection_example/test_images/gui.png +3 -0
- tutorials/detection_example/test_images/layout.jpg +3 -0
- tutorials/keypointing_example/.DS_Store +0 -0
- tutorials/keypointing_example/_full_tutorial.ipynb +0 -0
- tutorials/keypointing_example/animal_keypointing_example.py +63 -0
- tutorials/keypointing_example/person_keypointing_example.py +64 -0
- tutorials/keypointing_example/test_images/animal.png +3 -0
- tutorials/keypointing_example/test_images/person.png +3 -0
- tutorials/ocr_example/.DS_Store +0 -0
- tutorials/ocr_example/_full_tutorial.ipynb +0 -0
- tutorials/ocr_example/ocr_polygon_example.py +69 -0
- tutorials/ocr_example/ocr_textline_box_example.py +68 -0
- tutorials/ocr_example/ocr_word_box_example.py +68 -0
- tutorials/ocr_example/test_images/ocr.png +3 -0
- tutorials/other_example/batch_inference.py +86 -0
- tutorials/pointing_example.py +58 -0
- tutorials/pointing_example/.DS_Store +0 -0
- tutorials/pointing_example/_full_tutorial.ipynb +0 -0
- tutorials/pointing_example/affordance_pointing_example.py +69 -0
- tutorials/pointing_example/gui_pointing_example.py +68 -0
- tutorials/pointing_example/object_pointing_example.py +67 -0
- tutorials/pointing_example/test_images/boxes.jpg +3 -0
- tutorials/pointing_example/test_images/cup.png +0 -0
- tutorials/visual_prompting_example/_full_tutorial.ipynb +0 -0
- tutorials/visual_prompting_example/test_images/pigeons.jpeg +3 -0
- tutorials/visual_prompting_example/visual_prompt_example.py +72 -0
.gitattributes
CHANGED
|
@@ -43,3 +43,12 @@ assets/logo.png filter=lfs diff=lfs merge=lfs -text
|
|
| 43 |
assets/object_pointing_visualize.jpg filter=lfs diff=lfs merge=lfs -text
|
| 44 |
assets/pigeons_visualize.jpg filter=lfs diff=lfs merge=lfs -text
|
| 45 |
assets/teaser.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
assets/object_pointing_visualize.jpg filter=lfs diff=lfs merge=lfs -text
|
| 44 |
assets/pigeons_visualize.jpg filter=lfs diff=lfs merge=lfs -text
|
| 45 |
assets/teaser.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
tutorials/detection_example/test_images/boys.jpg filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
tutorials/detection_example/test_images/cafe.jpg filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
tutorials/detection_example/test_images/gui.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
tutorials/detection_example/test_images/layout.jpg filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
tutorials/keypointing_example/test_images/animal.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
tutorials/keypointing_example/test_images/person.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
tutorials/ocr_example/test_images/ocr.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
tutorials/pointing_example/test_images/boxes.jpg filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
tutorials/visual_prompting_example/test_images/pigeons.jpeg filter=lfs diff=lfs merge=lfs -text
|
tutorials/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
tutorials/detection_example/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
tutorials/detection_example/_full_notebook.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tutorials/detection_example/detection_example.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Basic object detection example using Rex Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
# Model path - replace with your actual model path
|
| 15 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 16 |
+
|
| 17 |
+
# Create wrapper with custom parameters
|
| 18 |
+
rex_model = RexOmniWrapper(
|
| 19 |
+
model_path=model_path,
|
| 20 |
+
backend="transformers", # or "vllm" for faster inference
|
| 21 |
+
max_tokens=4096,
|
| 22 |
+
temperature=0.0,
|
| 23 |
+
top_p=0.05,
|
| 24 |
+
top_k=1,
|
| 25 |
+
repetition_penalty=1.05,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# Load imag
|
| 29 |
+
image_path = "tutorials/detection_example/test_images/cafe.jpg" # Replace with your image path
|
| 30 |
+
image = Image.open(image_path).convert("RGB")
|
| 31 |
+
|
| 32 |
+
# Object detection
|
| 33 |
+
categories = [
|
| 34 |
+
"man",
|
| 35 |
+
"woman",
|
| 36 |
+
"yellow flower",
|
| 37 |
+
"sofa",
|
| 38 |
+
"robot-shope light",
|
| 39 |
+
"blanket",
|
| 40 |
+
"microwave",
|
| 41 |
+
"laptop",
|
| 42 |
+
"cup",
|
| 43 |
+
"white chair",
|
| 44 |
+
"lamp",
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
results = rex_model.inference(images=image, task="detection", categories=categories)
|
| 48 |
+
|
| 49 |
+
# Print results
|
| 50 |
+
result = results[0]
|
| 51 |
+
if result["success"]:
|
| 52 |
+
predictions = result["extracted_predictions"]
|
| 53 |
+
vis_image = RexOmniVisualize(
|
| 54 |
+
image=image,
|
| 55 |
+
predictions=predictions,
|
| 56 |
+
font_size=20,
|
| 57 |
+
draw_width=5,
|
| 58 |
+
show_labels=True,
|
| 59 |
+
)
|
| 60 |
+
# Save visualization
|
| 61 |
+
output_path = "tutorials/detection_example/test_images/cafe_visualize.jpg"
|
| 62 |
+
vis_image.save(output_path)
|
| 63 |
+
print(f"Visualization saved to: {output_path}")
|
| 64 |
+
|
| 65 |
+
else:
|
| 66 |
+
print(f"Inference failed: {result['error']}")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
if __name__ == "__main__":
|
| 70 |
+
main()
|
tutorials/detection_example/gui_grounding_example.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Basic object detection example using Rex Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
# Model path - replace with your actual model path
|
| 15 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 16 |
+
|
| 17 |
+
# Create wrapper with custom parameters
|
| 18 |
+
rex_model = RexOmniWrapper(
|
| 19 |
+
model_path=model_path,
|
| 20 |
+
backend="transformers", # or "vllm" for faster inference
|
| 21 |
+
max_tokens=4096,
|
| 22 |
+
temperature=0.0,
|
| 23 |
+
top_p=0.05,
|
| 24 |
+
top_k=1,
|
| 25 |
+
repetition_penalty=1.05,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# Load image
|
| 29 |
+
image_path = "tutorials/detection_example/test_images/gui.png" # Replace with your image path
|
| 30 |
+
image = Image.open(image_path).convert("RGB")
|
| 31 |
+
|
| 32 |
+
# Object detection
|
| 33 |
+
categories = ["more information of song 'Photograph'"]
|
| 34 |
+
|
| 35 |
+
results = rex_model.inference(
|
| 36 |
+
images=image, task="gui_grounding", categories=categories
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# Print results
|
| 40 |
+
result = results[0]
|
| 41 |
+
if result["success"]:
|
| 42 |
+
predictions = result["extracted_predictions"]
|
| 43 |
+
vis_image = RexOmniVisualize(
|
| 44 |
+
image=image,
|
| 45 |
+
predictions=predictions,
|
| 46 |
+
font_size=20,
|
| 47 |
+
draw_width=5,
|
| 48 |
+
show_labels=True,
|
| 49 |
+
)
|
| 50 |
+
# Save visualization
|
| 51 |
+
output_path = "tutorials/detection_example/test_images/gui_visualize.jpg"
|
| 52 |
+
vis_image.save(output_path)
|
| 53 |
+
print(f"Visualization saved to: {output_path}")
|
| 54 |
+
|
| 55 |
+
else:
|
| 56 |
+
print(f"Inference failed: {result['error']}")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
if __name__ == "__main__":
|
| 60 |
+
main()
|
tutorials/detection_example/layout_grouding_examle.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Basic object detection example using Rex Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
from PIL import Image
|
| 10 |
+
|
| 11 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
# Model path - replace with your actual model path
|
| 16 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 17 |
+
|
| 18 |
+
# Create wrapper with custom parameters
|
| 19 |
+
rex_model = RexOmniWrapper(
|
| 20 |
+
model_path=model_path,
|
| 21 |
+
backend="transformers", # or "vllm" for faster inference
|
| 22 |
+
max_tokens=4096,
|
| 23 |
+
temperature=0.0,
|
| 24 |
+
top_p=0.05,
|
| 25 |
+
top_k=1,
|
| 26 |
+
repetition_penalty=1.05,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Load image
|
| 30 |
+
image_path = "tutorials/detection_example/test_images/layout.jpg" # Replace with your image path
|
| 31 |
+
image = Image.open(image_path).convert("RGB")
|
| 32 |
+
|
| 33 |
+
# Object detection
|
| 34 |
+
categories = ["header", "headline", "paragraph", "page number", "figure", "section"]
|
| 35 |
+
|
| 36 |
+
results = rex_model.inference(images=image, task="detection", categories=categories)
|
| 37 |
+
|
| 38 |
+
# Print results
|
| 39 |
+
result = results[0]
|
| 40 |
+
if result["success"]:
|
| 41 |
+
predictions = result["extracted_predictions"]
|
| 42 |
+
vis_image = RexOmniVisualize(
|
| 43 |
+
image=image,
|
| 44 |
+
predictions=predictions,
|
| 45 |
+
font_size=20,
|
| 46 |
+
draw_width=5,
|
| 47 |
+
show_labels=True,
|
| 48 |
+
)
|
| 49 |
+
# Save visualization
|
| 50 |
+
output_path = "tutorials/detection_example/test_images/layout_visualize.jpg"
|
| 51 |
+
vis_image.save(output_path)
|
| 52 |
+
print(f"Visualization saved to: {output_path}")
|
| 53 |
+
|
| 54 |
+
else:
|
| 55 |
+
print(f"Inference failed: {result['error']}")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
main()
|
tutorials/detection_example/referring_example.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Basic object detection example using Rex Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
from PIL import Image
|
| 10 |
+
|
| 11 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
# Model path - replace with your actual model path
|
| 16 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 17 |
+
|
| 18 |
+
# Create wrapper with custom parameters
|
| 19 |
+
rex_model = RexOmniWrapper(
|
| 20 |
+
model_path=model_path,
|
| 21 |
+
backend="transformers", # or "vllm" for faster inference
|
| 22 |
+
max_tokens=4096,
|
| 23 |
+
temperature=0.0,
|
| 24 |
+
top_p=0.05,
|
| 25 |
+
top_k=1,
|
| 26 |
+
repetition_penalty=1.05,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Load image
|
| 30 |
+
image_path = "tutorials/detection_example/test_images/boys.jpg" # Replace with your image path
|
| 31 |
+
image = Image.open(image_path).convert("RGB")
|
| 32 |
+
|
| 33 |
+
# Object detection
|
| 34 |
+
categories = [
|
| 35 |
+
"boys holding microphone",
|
| 36 |
+
"boy playing piano",
|
| 37 |
+
"the four guitars on the wall",
|
| 38 |
+
"the guitar in someone's hand",
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
results = rex_model.inference(images=image, task="detection", categories=categories)
|
| 42 |
+
|
| 43 |
+
# Print results
|
| 44 |
+
result = results[0]
|
| 45 |
+
if result["success"]:
|
| 46 |
+
predictions = result["extracted_predictions"]
|
| 47 |
+
vis_image = RexOmniVisualize(
|
| 48 |
+
image=image,
|
| 49 |
+
predictions=predictions,
|
| 50 |
+
font_size=20,
|
| 51 |
+
draw_width=5,
|
| 52 |
+
show_labels=True,
|
| 53 |
+
)
|
| 54 |
+
# Save visualization
|
| 55 |
+
output_path = "tutorials/detection_example/test_images/boys_visualize.jpg"
|
| 56 |
+
vis_image.save(output_path)
|
| 57 |
+
print(f"Visualization saved to: {output_path}")
|
| 58 |
+
|
| 59 |
+
else:
|
| 60 |
+
print(f"Inference failed: {result['error']}")
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
main()
|
tutorials/detection_example/test_images/boys.jpg
ADDED
|
Git LFS Details
|
tutorials/detection_example/test_images/cafe.jpg
ADDED
|
Git LFS Details
|
tutorials/detection_example/test_images/gui.png
ADDED
|
Git LFS Details
|
tutorials/detection_example/test_images/layout.jpg
ADDED
|
Git LFS Details
|
tutorials/keypointing_example/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
tutorials/keypointing_example/_full_tutorial.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tutorials/keypointing_example/animal_keypointing_example.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Animal keypointing example using Rex Omni
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def main():
|
| 11 |
+
# Model path - replace with your actual model path
|
| 12 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 13 |
+
|
| 14 |
+
print("π Initializing Rex Omni model...")
|
| 15 |
+
|
| 16 |
+
# Create wrapper with custom parameters
|
| 17 |
+
rex_model = RexOmniWrapper(
|
| 18 |
+
model_path=model_path,
|
| 19 |
+
backend="transformers", # Choose "transformers" or "vllm"
|
| 20 |
+
max_tokens=2048,
|
| 21 |
+
temperature=0.0,
|
| 22 |
+
top_p=0.05,
|
| 23 |
+
top_k=1,
|
| 24 |
+
repetition_penalty=1.05,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Load image
|
| 28 |
+
image_path = "tutorials/keypointing_example/test_images/animal.png" # Replace with your image path
|
| 29 |
+
image = Image.open(image_path).convert("RGB")
|
| 30 |
+
print(f"β
Image loaded successfully!")
|
| 31 |
+
print(f"π Image size: {image.size}")
|
| 32 |
+
|
| 33 |
+
# Animal keypointing
|
| 34 |
+
print("π± Performing animal keypointing...")
|
| 35 |
+
results = rex_model.inference(
|
| 36 |
+
images=image, task="keypoint", keypoint_type="animal", categories=["cat"]
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# Process results
|
| 40 |
+
result = results[0]
|
| 41 |
+
if result["success"]:
|
| 42 |
+
predictions = result["extracted_predictions"]
|
| 43 |
+
vis_image = RexOmniVisualize(
|
| 44 |
+
image=image,
|
| 45 |
+
predictions=predictions,
|
| 46 |
+
font_size=6,
|
| 47 |
+
draw_width=6,
|
| 48 |
+
show_labels=True,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# Save visualization
|
| 52 |
+
output_path = (
|
| 53 |
+
"tutorials/keypointing_example/test_images/animal_keypointing_visualize.jpg"
|
| 54 |
+
)
|
| 55 |
+
vis_image.save(output_path)
|
| 56 |
+
print(f"β
Animal keypointing visualization saved to: {output_path}")
|
| 57 |
+
|
| 58 |
+
else:
|
| 59 |
+
print(f"β Inference failed: {result['error']}")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
if __name__ == "__main__":
|
| 63 |
+
main()
|
tutorials/keypointing_example/person_keypointing_example.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Person keypointing example using Rex Omni
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def main():
|
| 12 |
+
# Model path - replace with your actual model path
|
| 13 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 14 |
+
|
| 15 |
+
print("π Initializing Rex Omni model...")
|
| 16 |
+
|
| 17 |
+
# Create wrapper with custom parameters
|
| 18 |
+
rex_model = RexOmniWrapper(
|
| 19 |
+
model_path=model_path,
|
| 20 |
+
backend="transformers", # Choose "transformers" or "vllm"
|
| 21 |
+
max_tokens=2048,
|
| 22 |
+
temperature=0.0,
|
| 23 |
+
top_p=0.05,
|
| 24 |
+
top_k=1,
|
| 25 |
+
repetition_penalty=1.05,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# Load image
|
| 29 |
+
image_path = "tutorials/keypointing_example/test_images/person.png" # Replace with your image path
|
| 30 |
+
image = Image.open(image_path).convert("RGB")
|
| 31 |
+
print(f"β
Image loaded successfully!")
|
| 32 |
+
print(f"π Image size: {image.size}")
|
| 33 |
+
|
| 34 |
+
# Person keypointing
|
| 35 |
+
print("π€ Performing person keypointing...")
|
| 36 |
+
results = rex_model.inference(
|
| 37 |
+
images=image, task="keypoint", keypoint_type="person", categories=["person"]
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Process results
|
| 41 |
+
result = results[0]
|
| 42 |
+
if result["success"]:
|
| 43 |
+
predictions = result["extracted_predictions"]
|
| 44 |
+
vis_image = RexOmniVisualize(
|
| 45 |
+
image=image,
|
| 46 |
+
predictions=predictions,
|
| 47 |
+
font_size=6,
|
| 48 |
+
draw_width=6,
|
| 49 |
+
show_labels=True,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# Save visualization
|
| 53 |
+
output_path = (
|
| 54 |
+
"tutorials/keypointing_example/test_images/person_keypointing_visualize.jpg"
|
| 55 |
+
)
|
| 56 |
+
vis_image.save(output_path)
|
| 57 |
+
print(f"β
Person keypointing visualization saved to: {output_path}")
|
| 58 |
+
|
| 59 |
+
else:
|
| 60 |
+
print(f"β Inference failed: {result['error']}")
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
main()
|
tutorials/keypointing_example/test_images/animal.png
ADDED
|
Git LFS Details
|
tutorials/keypointing_example/test_images/person.png
ADDED
|
Git LFS Details
|
tutorials/ocr_example/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
tutorials/ocr_example/_full_tutorial.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tutorials/ocr_example/ocr_polygon_example.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
OCR text line-level detection example using Rex Omni (polygon format)
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
# Model path - replace with your actual model path
|
| 17 |
+
model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
|
| 18 |
+
|
| 19 |
+
print("π Initializing Rex Omni model...")
|
| 20 |
+
|
| 21 |
+
# Create wrapper with custom parameters
|
| 22 |
+
rex_model = RexOmniWrapper(
|
| 23 |
+
model_path=model_path,
|
| 24 |
+
backend="transformers", # Choose "transformers" or "vllm"
|
| 25 |
+
max_tokens=2048,
|
| 26 |
+
temperature=0.0,
|
| 27 |
+
top_p=0.05,
|
| 28 |
+
top_k=1,
|
| 29 |
+
repetition_penalty=1.05,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Load image
|
| 33 |
+
image_path = (
|
| 34 |
+
"tutorials/ocr_example/test_images/ocr.png" # Replace with your image path
|
| 35 |
+
)
|
| 36 |
+
image = Image.open(image_path).convert("RGB")
|
| 37 |
+
print(f"β
Image loaded successfully!")
|
| 38 |
+
print(f"π Image size: {image.size}")
|
| 39 |
+
|
| 40 |
+
# OCR text line-level detection in polygon format
|
| 41 |
+
categories = ["text line"]
|
| 42 |
+
|
| 43 |
+
print("π Performing text line-level OCR detection (polygon format)...")
|
| 44 |
+
results = rex_model.inference(
|
| 45 |
+
images=image, task="ocr_polygon", categories=categories
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# Process results
|
| 49 |
+
result = results[0]
|
| 50 |
+
if result["success"]:
|
| 51 |
+
predictions = result["extracted_predictions"]
|
| 52 |
+
vis_image = RexOmniVisualize(
|
| 53 |
+
image=image,
|
| 54 |
+
predictions=predictions,
|
| 55 |
+
font_size=15,
|
| 56 |
+
draw_width=5,
|
| 57 |
+
show_labels=True,
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Save visualization
|
| 61 |
+
output_path = "tutorials/ocr_example/test_images/ocr_polygon_visualize.jpg"
|
| 62 |
+
vis_image.save(output_path)
|
| 63 |
+
print(f"β
Polygon OCR visualization saved to: {output_path}")
|
| 64 |
+
else:
|
| 65 |
+
print(f"β Inference failed: {result['error']}")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
main()
|
tutorials/ocr_example/ocr_textline_box_example.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
OCR text line-level detection example using Rex Omni (box format)
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
# Model path - replace with your actual model path
|
| 17 |
+
model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
|
| 18 |
+
|
| 19 |
+
print("π Initializing Rex Omni model...")
|
| 20 |
+
|
| 21 |
+
# Create wrapper with custom parameters
|
| 22 |
+
rex_model = RexOmniWrapper(
|
| 23 |
+
model_path=model_path,
|
| 24 |
+
backend="transformers", # Choose "transformers" or "vllm"
|
| 25 |
+
max_tokens=2048,
|
| 26 |
+
temperature=0.0,
|
| 27 |
+
top_p=0.05,
|
| 28 |
+
top_k=1,
|
| 29 |
+
repetition_penalty=1.05,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Load image
|
| 33 |
+
image_path = (
|
| 34 |
+
"tutorials/ocr_example/test_images/ocr.png" # Replace with your image path
|
| 35 |
+
)
|
| 36 |
+
image = Image.open(image_path).convert("RGB")
|
| 37 |
+
print(f"β
Image loaded successfully!")
|
| 38 |
+
print(f"π Image size: {image.size}")
|
| 39 |
+
|
| 40 |
+
# OCR text line-level detection in box format
|
| 41 |
+
categories = ["text line"]
|
| 42 |
+
|
| 43 |
+
print("π Performing text line-level OCR detection...")
|
| 44 |
+
results = rex_model.inference(images=image, task="ocr_box", categories=categories)
|
| 45 |
+
|
| 46 |
+
# Process results
|
| 47 |
+
result = results[0]
|
| 48 |
+
if result["success"]:
|
| 49 |
+
predictions = result["extracted_predictions"]
|
| 50 |
+
vis_image = RexOmniVisualize(
|
| 51 |
+
image=image,
|
| 52 |
+
predictions=predictions,
|
| 53 |
+
font_size=15,
|
| 54 |
+
draw_width=3,
|
| 55 |
+
show_labels=True,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Save visualization
|
| 59 |
+
output_path = "tutorials/ocr_example/test_images/ocr_textline_box_visualize.jpg"
|
| 60 |
+
vis_image.save(output_path)
|
| 61 |
+
print(f"β
Text line-level OCR visualization saved to: {output_path}")
|
| 62 |
+
|
| 63 |
+
else:
|
| 64 |
+
print(f"β Inference failed: {result['error']}")
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
if __name__ == "__main__":
|
| 68 |
+
main()
|
tutorials/ocr_example/ocr_word_box_example.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
OCR word-level detection example using Rex Omni (box format)
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
# Model path - replace with your actual model path
|
| 17 |
+
model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
|
| 18 |
+
|
| 19 |
+
print("π Initializing Rex Omni model...")
|
| 20 |
+
|
| 21 |
+
# Create wrapper with custom parameters
|
| 22 |
+
rex_model = RexOmniWrapper(
|
| 23 |
+
model_path=model_path,
|
| 24 |
+
backend="transformers", # Choose "transformers" or "vllm"
|
| 25 |
+
max_tokens=2048,
|
| 26 |
+
temperature=0.0,
|
| 27 |
+
top_p=0.05,
|
| 28 |
+
top_k=1,
|
| 29 |
+
repetition_penalty=1.05,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Load image
|
| 33 |
+
image_path = (
|
| 34 |
+
"tutorials/ocr_example/test_images/ocr.png" # Replace with your image path
|
| 35 |
+
)
|
| 36 |
+
image = Image.open(image_path).convert("RGB")
|
| 37 |
+
print(f"β
Image loaded successfully!")
|
| 38 |
+
print(f"π Image size: {image.size}")
|
| 39 |
+
|
| 40 |
+
# OCR word-level detection in box format
|
| 41 |
+
categories = ["word"]
|
| 42 |
+
|
| 43 |
+
print("π Performing word-level OCR detection...")
|
| 44 |
+
results = rex_model.inference(images=image, task="ocr_box", categories=categories)
|
| 45 |
+
|
| 46 |
+
# Process results
|
| 47 |
+
result = results[0]
|
| 48 |
+
if result["success"]:
|
| 49 |
+
predictions = result["extracted_predictions"]
|
| 50 |
+
vis_image = RexOmniVisualize(
|
| 51 |
+
image=image,
|
| 52 |
+
predictions=predictions,
|
| 53 |
+
font_size=20,
|
| 54 |
+
draw_width=5,
|
| 55 |
+
show_labels=True,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Save visualization
|
| 59 |
+
output_path = "tutorials/ocr_example/test_images/ocr_word_box_visualize.jpg"
|
| 60 |
+
vis_image.save(output_path)
|
| 61 |
+
print(f"β
Word-level OCR visualization saved to: {output_path}")
|
| 62 |
+
|
| 63 |
+
else:
|
| 64 |
+
print(f"β Inference failed: {result['error']}")
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
if __name__ == "__main__":
|
| 68 |
+
main()
|
tutorials/ocr_example/test_images/ocr.png
ADDED
|
Git LFS Details
|
tutorials/other_example/batch_inference.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Batch Inference example using Rex-Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
from PIL import Image
|
| 10 |
+
|
| 11 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
# Model path - replace with your actual model path
|
| 16 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 17 |
+
|
| 18 |
+
# Create wrapper with custom parameters
|
| 19 |
+
rex_model = RexOmniWrapper(
|
| 20 |
+
model_path=model_path,
|
| 21 |
+
backend="transformers", # or "vllm" for faster inference
|
| 22 |
+
max_tokens=4096,
|
| 23 |
+
temperature=0.0,
|
| 24 |
+
top_p=0.05,
|
| 25 |
+
top_k=1,
|
| 26 |
+
repetition_penalty=1.05,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Load imag
|
| 30 |
+
image_paths = [
|
| 31 |
+
"tutorials/detection_example/test_images/cafe.jpg",
|
| 32 |
+
"tutorials/detection_example/test_images/boys.jpg",
|
| 33 |
+
]
|
| 34 |
+
images = [Image.open(image_path).convert("RGB") for image_path in image_paths]
|
| 35 |
+
|
| 36 |
+
# Object detection
|
| 37 |
+
categories = [
|
| 38 |
+
[
|
| 39 |
+
"man",
|
| 40 |
+
"woman",
|
| 41 |
+
"yellow flower",
|
| 42 |
+
"sofa",
|
| 43 |
+
"robot-shope light",
|
| 44 |
+
"blanket",
|
| 45 |
+
"microwave",
|
| 46 |
+
"laptop",
|
| 47 |
+
"cup",
|
| 48 |
+
"white chair",
|
| 49 |
+
"lamp",
|
| 50 |
+
],
|
| 51 |
+
[
|
| 52 |
+
"boys holding microphone",
|
| 53 |
+
"boy playing piano",
|
| 54 |
+
"the four guitars on the wall",
|
| 55 |
+
"the guitar in someone's hand",
|
| 56 |
+
],
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
results = rex_model.inference(
|
| 60 |
+
images=images, task=["detection", "detection"], categories=categories
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# Print results
|
| 64 |
+
batch_idx = 0
|
| 65 |
+
for result, image in zip(results, images):
|
| 66 |
+
if result["success"]:
|
| 67 |
+
predictions = result["extracted_predictions"]
|
| 68 |
+
vis_image = RexOmniVisualize(
|
| 69 |
+
image=image,
|
| 70 |
+
predictions=predictions,
|
| 71 |
+
font_size=20,
|
| 72 |
+
draw_width=5,
|
| 73 |
+
show_labels=True,
|
| 74 |
+
)
|
| 75 |
+
# Save visualization
|
| 76 |
+
output_path = f"tutorials/other_example/batch_inference_{batch_idx}.jpg"
|
| 77 |
+
vis_image.save(output_path)
|
| 78 |
+
print(f"Visualization saved to: {output_path}")
|
| 79 |
+
|
| 80 |
+
else:
|
| 81 |
+
print(f"Inference failed: {result['error']}")
|
| 82 |
+
batch_idx += 1
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
if __name__ == "__main__":
|
| 86 |
+
main()
|
tutorials/pointing_example.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Basic object detection example using Rex Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from rex_omni import RexOmniWrapper, visualize_predictions
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
# Model path - replace with your actual model path
|
| 15 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 16 |
+
|
| 17 |
+
# Create wrapper with custom parameters
|
| 18 |
+
rex_model = RexOmniWrapper(
|
| 19 |
+
model_path=model_path,
|
| 20 |
+
backend="transformers", # or "vllm" for faster inference
|
| 21 |
+
max_tokens=2048,
|
| 22 |
+
temperature=0.0,
|
| 23 |
+
top_p=0.05,
|
| 24 |
+
top_k=1,
|
| 25 |
+
repetition_penalty=1.05,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# Load image
|
| 29 |
+
image_path = "examples/test_images/pigeon.jpeg" # Replace with your image path
|
| 30 |
+
image = Image.open(image_path).convert("RGB")
|
| 31 |
+
|
| 32 |
+
# Object detection
|
| 33 |
+
categories = ["pigeons"]
|
| 34 |
+
|
| 35 |
+
results = rex_model.inference(images=image, task="pointing", categories=categories)
|
| 36 |
+
|
| 37 |
+
# Print results
|
| 38 |
+
result = results[0]
|
| 39 |
+
if result["success"]:
|
| 40 |
+
predictions = result["extracted_predictions"]
|
| 41 |
+
vis_image = visualize_predictions(
|
| 42 |
+
image=image,
|
| 43 |
+
predictions=predictions,
|
| 44 |
+
font_size=20,
|
| 45 |
+
draw_width=10,
|
| 46 |
+
show_labels=True,
|
| 47 |
+
)
|
| 48 |
+
# Save visualization
|
| 49 |
+
output_path = "examples/test_images/pigeon_visualize.jpg"
|
| 50 |
+
vis_image.save(output_path)
|
| 51 |
+
print(f"Visualization saved to: {output_path}")
|
| 52 |
+
|
| 53 |
+
else:
|
| 54 |
+
print(f"Inference failed: {result['error']}")
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
if __name__ == "__main__":
|
| 58 |
+
main()
|
tutorials/pointing_example/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
tutorials/pointing_example/_full_tutorial.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tutorials/pointing_example/affordance_pointing_example.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Affordance pointing example using Rex Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
# Model path - replace with your actual model path
|
| 17 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 18 |
+
|
| 19 |
+
print("π Initializing Rex Omni model...")
|
| 20 |
+
|
| 21 |
+
# Create wrapper with custom parameters
|
| 22 |
+
rex_model = RexOmniWrapper(
|
| 23 |
+
model_path=model_path,
|
| 24 |
+
backend="transformers", # Choose "transformers" or "vllm"
|
| 25 |
+
max_tokens=2048,
|
| 26 |
+
temperature=0.0,
|
| 27 |
+
top_p=0.05,
|
| 28 |
+
top_k=1,
|
| 29 |
+
repetition_penalty=1.05,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Load image
|
| 33 |
+
image_path = (
|
| 34 |
+
"tutorials/pointing_example/test_images/cup.png" # Replace with your image path
|
| 35 |
+
)
|
| 36 |
+
image = Image.open(image_path).convert("RGB")
|
| 37 |
+
print(f"β
Image loaded successfully!")
|
| 38 |
+
print(f"π Image size: {image.size}")
|
| 39 |
+
|
| 40 |
+
# Affordance pointing - where to hold the cup
|
| 41 |
+
categories = ["where I can hold the green cup"]
|
| 42 |
+
|
| 43 |
+
print("π€ Performing affordance pointing...")
|
| 44 |
+
results = rex_model.inference(images=image, task="pointing", categories=categories)
|
| 45 |
+
|
| 46 |
+
# Process results
|
| 47 |
+
result = results[0]
|
| 48 |
+
if result["success"]:
|
| 49 |
+
predictions = result["extracted_predictions"]
|
| 50 |
+
vis_image = RexOmniVisualize(
|
| 51 |
+
image=image,
|
| 52 |
+
predictions=predictions,
|
| 53 |
+
font_size=20,
|
| 54 |
+
draw_width=10,
|
| 55 |
+
show_labels=True,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Save visualization
|
| 59 |
+
output_path = (
|
| 60 |
+
"tutorials/pointing_example/test_images/affordance_pointing_visualize.jpg"
|
| 61 |
+
)
|
| 62 |
+
vis_image.save(output_path)
|
| 63 |
+
print(f"β
Affordance pointing visualization saved to: {output_path}")
|
| 64 |
+
else:
|
| 65 |
+
print(f"β Inference failed: {result['error']}")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
if __name__ == "__main__":
|
| 69 |
+
main()
|
tutorials/pointing_example/gui_pointing_example.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
GUI pointing example using Rex Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
# Model path - replace with your actual model path
|
| 17 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 18 |
+
|
| 19 |
+
print("π Initializing Rex Omni model...")
|
| 20 |
+
|
| 21 |
+
# Create wrapper with custom parameters
|
| 22 |
+
rex_model = RexOmniWrapper(
|
| 23 |
+
model_path=model_path,
|
| 24 |
+
backend="transformers", # Choose "transformers" or "vllm"
|
| 25 |
+
max_tokens=2048,
|
| 26 |
+
temperature=0.0,
|
| 27 |
+
top_p=0.05,
|
| 28 |
+
top_k=1,
|
| 29 |
+
repetition_penalty=1.05,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Load image
|
| 33 |
+
image_path = "tutorials/detection_example/test_images/gui.png" # Replace with your image path
|
| 34 |
+
image = Image.open(image_path).convert("RGB")
|
| 35 |
+
print(f"β
Image loaded successfully!")
|
| 36 |
+
print(f"π Image size: {image.size}")
|
| 37 |
+
|
| 38 |
+
# GUI pointing - find specific UI element
|
| 39 |
+
categories = ["element 'pause current song'"]
|
| 40 |
+
|
| 41 |
+
print("π±οΈ Performing GUI pointing...")
|
| 42 |
+
results = rex_model.inference(images=image, task="pointing", categories=categories)
|
| 43 |
+
|
| 44 |
+
# Process results
|
| 45 |
+
result = results[0]
|
| 46 |
+
if result["success"]:
|
| 47 |
+
predictions = result["extracted_predictions"]
|
| 48 |
+
vis_image = RexOmniVisualize(
|
| 49 |
+
image=image,
|
| 50 |
+
predictions=predictions,
|
| 51 |
+
font_size=50,
|
| 52 |
+
draw_width=15,
|
| 53 |
+
show_labels=True,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# Save visualization
|
| 57 |
+
output_path = (
|
| 58 |
+
"tutorials/pointing_example/test_images/gui_pointing_visualize.jpg"
|
| 59 |
+
)
|
| 60 |
+
vis_image.save(output_path)
|
| 61 |
+
print(f"β
GUI pointing visualization saved to: {output_path}")
|
| 62 |
+
|
| 63 |
+
else:
|
| 64 |
+
print(f"β Inference failed: {result['error']}")
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
if __name__ == "__main__":
|
| 68 |
+
main()
|
tutorials/pointing_example/object_pointing_example.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Object pointing example using Rex Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
# Model path - replace with your actual model path
|
| 17 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 18 |
+
|
| 19 |
+
print("π Initializing Rex Omni model...")
|
| 20 |
+
|
| 21 |
+
# Create wrapper with custom parameters
|
| 22 |
+
rex_model = RexOmniWrapper(
|
| 23 |
+
model_path=model_path,
|
| 24 |
+
backend="transformers", # Choose "transformers" or "vllm"
|
| 25 |
+
max_tokens=2048,
|
| 26 |
+
temperature=0.0,
|
| 27 |
+
top_p=0.05,
|
| 28 |
+
top_k=1,
|
| 29 |
+
repetition_penalty=1.05,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Load image
|
| 33 |
+
image_path = "tutorials/pointing_example/test_images/boxes.jpg" # Replace with your image path
|
| 34 |
+
image = Image.open(image_path).convert("RGB")
|
| 35 |
+
print(f"β
Image loaded successfully!")
|
| 36 |
+
print(f"π Image size: {image.size}")
|
| 37 |
+
|
| 38 |
+
# Object pointing
|
| 39 |
+
categories = ["open boxes", "closed boxes"]
|
| 40 |
+
|
| 41 |
+
print("π― Performing object pointing...")
|
| 42 |
+
results = rex_model.inference(images=image, task="pointing", categories=categories)
|
| 43 |
+
|
| 44 |
+
# Process results
|
| 45 |
+
result = results[0]
|
| 46 |
+
if result["success"]:
|
| 47 |
+
predictions = result["extracted_predictions"]
|
| 48 |
+
vis_image = RexOmniVisualize(
|
| 49 |
+
image=image,
|
| 50 |
+
predictions=predictions,
|
| 51 |
+
font_size=30,
|
| 52 |
+
draw_width=10,
|
| 53 |
+
show_labels=True,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# Save visualization
|
| 57 |
+
output_path = (
|
| 58 |
+
"tutorials/pointing_example/test_images/object_pointing_visualize.jpg"
|
| 59 |
+
)
|
| 60 |
+
vis_image.save(output_path)
|
| 61 |
+
print(f"β
Object pointing visualization saved to: {output_path}")
|
| 62 |
+
else:
|
| 63 |
+
print(f"β Inference failed: {result['error']}")
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
tutorials/pointing_example/test_images/boxes.jpg
ADDED
|
Git LFS Details
|
tutorials/pointing_example/test_images/cup.png
ADDED
|
tutorials/visual_prompting_example/_full_tutorial.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tutorials/visual_prompting_example/test_images/pigeons.jpeg
ADDED
|
Git LFS Details
|
tutorials/visual_prompting_example/visual_prompt_example.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Object pointing example using Rex Omni
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
from rex_omni import RexOmniVisualize, RexOmniWrapper
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
# Model path - replace with your actual model path
|
| 17 |
+
model_path = "IDEA-Research/Rex-Omni"
|
| 18 |
+
|
| 19 |
+
print("π Initializing Rex Omni model...")
|
| 20 |
+
|
| 21 |
+
# Create wrapper with custom parameters
|
| 22 |
+
rex_model = RexOmniWrapper(
|
| 23 |
+
model_path=model_path,
|
| 24 |
+
backend="transformers", # Choose "transformers" or "vllm"
|
| 25 |
+
max_tokens=2048,
|
| 26 |
+
temperature=0.0,
|
| 27 |
+
top_p=0.05,
|
| 28 |
+
top_k=1,
|
| 29 |
+
repetition_penalty=1.05,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Load image
|
| 33 |
+
image_path = "tutorials/visual_prompting_example/test_images/pigeons.jpeg" # Replace with your image path
|
| 34 |
+
image = Image.open(image_path).convert("RGB")
|
| 35 |
+
print(f"β
Image loaded successfully!")
|
| 36 |
+
print(f"π Image size: {image.size}")
|
| 37 |
+
|
| 38 |
+
visual_prompts = [
|
| 39 |
+
[644, 1210, 842, 1361],
|
| 40 |
+
[1180, 1066, 1227, 1160], # Box 3: bottom region
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
print("π― Performing object pointing...")
|
| 44 |
+
results = rex_model.inference(
|
| 45 |
+
images=image,
|
| 46 |
+
task="visual_prompting",
|
| 47 |
+
visual_prompt_boxes=visual_prompts,
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Process results
|
| 51 |
+
result = results[0]
|
| 52 |
+
if result["success"]:
|
| 53 |
+
predictions = result["extracted_predictions"]
|
| 54 |
+
vis_image = RexOmniVisualize(
|
| 55 |
+
image=image,
|
| 56 |
+
predictions=predictions,
|
| 57 |
+
font_size=30,
|
| 58 |
+
draw_width=10,
|
| 59 |
+
show_labels=True,
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Save visualization
|
| 63 |
+
output_path = (
|
| 64 |
+
"tutorials/visual_prompting_example/test_images/pigeons_visualize.jpg"
|
| 65 |
+
)
|
| 66 |
+
vis_image.save(output_path)
|
| 67 |
+
else:
|
| 68 |
+
print(f"β Inference failed: {result['error']}")
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
main()
|