Mountchicken commited on
Commit
ced4fcf
Β·
verified Β·
1 Parent(s): a171f59

Upload 35 files

Browse files
Files changed (36) hide show
  1. .gitattributes +9 -0
  2. tutorials/.DS_Store +0 -0
  3. tutorials/detection_example/.DS_Store +0 -0
  4. tutorials/detection_example/_full_notebook.ipynb +0 -0
  5. tutorials/detection_example/detection_example.py +70 -0
  6. tutorials/detection_example/gui_grounding_example.py +60 -0
  7. tutorials/detection_example/layout_grouding_examle.py +59 -0
  8. tutorials/detection_example/referring_example.py +64 -0
  9. tutorials/detection_example/test_images/boys.jpg +3 -0
  10. tutorials/detection_example/test_images/cafe.jpg +3 -0
  11. tutorials/detection_example/test_images/gui.png +3 -0
  12. tutorials/detection_example/test_images/layout.jpg +3 -0
  13. tutorials/keypointing_example/.DS_Store +0 -0
  14. tutorials/keypointing_example/_full_tutorial.ipynb +0 -0
  15. tutorials/keypointing_example/animal_keypointing_example.py +63 -0
  16. tutorials/keypointing_example/person_keypointing_example.py +64 -0
  17. tutorials/keypointing_example/test_images/animal.png +3 -0
  18. tutorials/keypointing_example/test_images/person.png +3 -0
  19. tutorials/ocr_example/.DS_Store +0 -0
  20. tutorials/ocr_example/_full_tutorial.ipynb +0 -0
  21. tutorials/ocr_example/ocr_polygon_example.py +69 -0
  22. tutorials/ocr_example/ocr_textline_box_example.py +68 -0
  23. tutorials/ocr_example/ocr_word_box_example.py +68 -0
  24. tutorials/ocr_example/test_images/ocr.png +3 -0
  25. tutorials/other_example/batch_inference.py +86 -0
  26. tutorials/pointing_example.py +58 -0
  27. tutorials/pointing_example/.DS_Store +0 -0
  28. tutorials/pointing_example/_full_tutorial.ipynb +0 -0
  29. tutorials/pointing_example/affordance_pointing_example.py +69 -0
  30. tutorials/pointing_example/gui_pointing_example.py +68 -0
  31. tutorials/pointing_example/object_pointing_example.py +67 -0
  32. tutorials/pointing_example/test_images/boxes.jpg +3 -0
  33. tutorials/pointing_example/test_images/cup.png +0 -0
  34. tutorials/visual_prompting_example/_full_tutorial.ipynb +0 -0
  35. tutorials/visual_prompting_example/test_images/pigeons.jpeg +3 -0
  36. tutorials/visual_prompting_example/visual_prompt_example.py +72 -0
.gitattributes CHANGED
@@ -43,3 +43,12 @@ assets/logo.png filter=lfs diff=lfs merge=lfs -text
43
  assets/object_pointing_visualize.jpg filter=lfs diff=lfs merge=lfs -text
44
  assets/pigeons_visualize.jpg filter=lfs diff=lfs merge=lfs -text
45
  assets/teaser.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
43
  assets/object_pointing_visualize.jpg filter=lfs diff=lfs merge=lfs -text
44
  assets/pigeons_visualize.jpg filter=lfs diff=lfs merge=lfs -text
45
  assets/teaser.png filter=lfs diff=lfs merge=lfs -text
46
+ tutorials/detection_example/test_images/boys.jpg filter=lfs diff=lfs merge=lfs -text
47
+ tutorials/detection_example/test_images/cafe.jpg filter=lfs diff=lfs merge=lfs -text
48
+ tutorials/detection_example/test_images/gui.png filter=lfs diff=lfs merge=lfs -text
49
+ tutorials/detection_example/test_images/layout.jpg filter=lfs diff=lfs merge=lfs -text
50
+ tutorials/keypointing_example/test_images/animal.png filter=lfs diff=lfs merge=lfs -text
51
+ tutorials/keypointing_example/test_images/person.png filter=lfs diff=lfs merge=lfs -text
52
+ tutorials/ocr_example/test_images/ocr.png filter=lfs diff=lfs merge=lfs -text
53
+ tutorials/pointing_example/test_images/boxes.jpg filter=lfs diff=lfs merge=lfs -text
54
+ tutorials/visual_prompting_example/test_images/pigeons.jpeg filter=lfs diff=lfs merge=lfs -text
tutorials/.DS_Store ADDED
Binary file (8.2 kB). View file
 
tutorials/detection_example/.DS_Store ADDED
Binary file (6.15 kB). View file
 
tutorials/detection_example/_full_notebook.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
tutorials/detection_example/detection_example.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Basic object detection example using Rex Omni
6
+ """
7
+
8
+ import torch
9
+ from PIL import Image
10
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
11
+
12
+
13
+ def main():
14
+ # Model path - replace with your actual model path
15
+ model_path = "IDEA-Research/Rex-Omni"
16
+
17
+ # Create wrapper with custom parameters
18
+ rex_model = RexOmniWrapper(
19
+ model_path=model_path,
20
+ backend="transformers", # or "vllm" for faster inference
21
+ max_tokens=4096,
22
+ temperature=0.0,
23
+ top_p=0.05,
24
+ top_k=1,
25
+ repetition_penalty=1.05,
26
+ )
27
+
28
+ # Load imag
29
+ image_path = "tutorials/detection_example/test_images/cafe.jpg" # Replace with your image path
30
+ image = Image.open(image_path).convert("RGB")
31
+
32
+ # Object detection
33
+ categories = [
34
+ "man",
35
+ "woman",
36
+ "yellow flower",
37
+ "sofa",
38
+ "robot-shope light",
39
+ "blanket",
40
+ "microwave",
41
+ "laptop",
42
+ "cup",
43
+ "white chair",
44
+ "lamp",
45
+ ]
46
+
47
+ results = rex_model.inference(images=image, task="detection", categories=categories)
48
+
49
+ # Print results
50
+ result = results[0]
51
+ if result["success"]:
52
+ predictions = result["extracted_predictions"]
53
+ vis_image = RexOmniVisualize(
54
+ image=image,
55
+ predictions=predictions,
56
+ font_size=20,
57
+ draw_width=5,
58
+ show_labels=True,
59
+ )
60
+ # Save visualization
61
+ output_path = "tutorials/detection_example/test_images/cafe_visualize.jpg"
62
+ vis_image.save(output_path)
63
+ print(f"Visualization saved to: {output_path}")
64
+
65
+ else:
66
+ print(f"Inference failed: {result['error']}")
67
+
68
+
69
+ if __name__ == "__main__":
70
+ main()
tutorials/detection_example/gui_grounding_example.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Basic object detection example using Rex Omni
6
+ """
7
+
8
+ import torch
9
+ from PIL import Image
10
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
11
+
12
+
13
+ def main():
14
+ # Model path - replace with your actual model path
15
+ model_path = "IDEA-Research/Rex-Omni"
16
+
17
+ # Create wrapper with custom parameters
18
+ rex_model = RexOmniWrapper(
19
+ model_path=model_path,
20
+ backend="transformers", # or "vllm" for faster inference
21
+ max_tokens=4096,
22
+ temperature=0.0,
23
+ top_p=0.05,
24
+ top_k=1,
25
+ repetition_penalty=1.05,
26
+ )
27
+
28
+ # Load image
29
+ image_path = "tutorials/detection_example/test_images/gui.png" # Replace with your image path
30
+ image = Image.open(image_path).convert("RGB")
31
+
32
+ # Object detection
33
+ categories = ["more information of song 'Photograph'"]
34
+
35
+ results = rex_model.inference(
36
+ images=image, task="gui_grounding", categories=categories
37
+ )
38
+
39
+ # Print results
40
+ result = results[0]
41
+ if result["success"]:
42
+ predictions = result["extracted_predictions"]
43
+ vis_image = RexOmniVisualize(
44
+ image=image,
45
+ predictions=predictions,
46
+ font_size=20,
47
+ draw_width=5,
48
+ show_labels=True,
49
+ )
50
+ # Save visualization
51
+ output_path = "tutorials/detection_example/test_images/gui_visualize.jpg"
52
+ vis_image.save(output_path)
53
+ print(f"Visualization saved to: {output_path}")
54
+
55
+ else:
56
+ print(f"Inference failed: {result['error']}")
57
+
58
+
59
+ if __name__ == "__main__":
60
+ main()
tutorials/detection_example/layout_grouding_examle.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Basic object detection example using Rex Omni
6
+ """
7
+
8
+ import torch
9
+ from PIL import Image
10
+
11
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
12
+
13
+
14
+ def main():
15
+ # Model path - replace with your actual model path
16
+ model_path = "IDEA-Research/Rex-Omni"
17
+
18
+ # Create wrapper with custom parameters
19
+ rex_model = RexOmniWrapper(
20
+ model_path=model_path,
21
+ backend="transformers", # or "vllm" for faster inference
22
+ max_tokens=4096,
23
+ temperature=0.0,
24
+ top_p=0.05,
25
+ top_k=1,
26
+ repetition_penalty=1.05,
27
+ )
28
+
29
+ # Load image
30
+ image_path = "tutorials/detection_example/test_images/layout.jpg" # Replace with your image path
31
+ image = Image.open(image_path).convert("RGB")
32
+
33
+ # Object detection
34
+ categories = ["header", "headline", "paragraph", "page number", "figure", "section"]
35
+
36
+ results = rex_model.inference(images=image, task="detection", categories=categories)
37
+
38
+ # Print results
39
+ result = results[0]
40
+ if result["success"]:
41
+ predictions = result["extracted_predictions"]
42
+ vis_image = RexOmniVisualize(
43
+ image=image,
44
+ predictions=predictions,
45
+ font_size=20,
46
+ draw_width=5,
47
+ show_labels=True,
48
+ )
49
+ # Save visualization
50
+ output_path = "tutorials/detection_example/test_images/layout_visualize.jpg"
51
+ vis_image.save(output_path)
52
+ print(f"Visualization saved to: {output_path}")
53
+
54
+ else:
55
+ print(f"Inference failed: {result['error']}")
56
+
57
+
58
+ if __name__ == "__main__":
59
+ main()
tutorials/detection_example/referring_example.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Basic object detection example using Rex Omni
6
+ """
7
+
8
+ import torch
9
+ from PIL import Image
10
+
11
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
12
+
13
+
14
+ def main():
15
+ # Model path - replace with your actual model path
16
+ model_path = "IDEA-Research/Rex-Omni"
17
+
18
+ # Create wrapper with custom parameters
19
+ rex_model = RexOmniWrapper(
20
+ model_path=model_path,
21
+ backend="transformers", # or "vllm" for faster inference
22
+ max_tokens=4096,
23
+ temperature=0.0,
24
+ top_p=0.05,
25
+ top_k=1,
26
+ repetition_penalty=1.05,
27
+ )
28
+
29
+ # Load image
30
+ image_path = "tutorials/detection_example/test_images/boys.jpg" # Replace with your image path
31
+ image = Image.open(image_path).convert("RGB")
32
+
33
+ # Object detection
34
+ categories = [
35
+ "boys holding microphone",
36
+ "boy playing piano",
37
+ "the four guitars on the wall",
38
+ "the guitar in someone's hand",
39
+ ]
40
+
41
+ results = rex_model.inference(images=image, task="detection", categories=categories)
42
+
43
+ # Print results
44
+ result = results[0]
45
+ if result["success"]:
46
+ predictions = result["extracted_predictions"]
47
+ vis_image = RexOmniVisualize(
48
+ image=image,
49
+ predictions=predictions,
50
+ font_size=20,
51
+ draw_width=5,
52
+ show_labels=True,
53
+ )
54
+ # Save visualization
55
+ output_path = "tutorials/detection_example/test_images/boys_visualize.jpg"
56
+ vis_image.save(output_path)
57
+ print(f"Visualization saved to: {output_path}")
58
+
59
+ else:
60
+ print(f"Inference failed: {result['error']}")
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()
tutorials/detection_example/test_images/boys.jpg ADDED

Git LFS Details

  • SHA256: 48553869139c5a2c6715e55018499738cb2909b9101d67ad0650359c34e48770
  • Pointer size: 131 Bytes
  • Size of remote file: 226 kB
tutorials/detection_example/test_images/cafe.jpg ADDED

Git LFS Details

  • SHA256: 190c2d80267af5eaf3a2e5096fdc9be2159d2da750bd2e9b5b552959935d9ddb
  • Pointer size: 131 Bytes
  • Size of remote file: 228 kB
tutorials/detection_example/test_images/gui.png ADDED

Git LFS Details

  • SHA256: 1c9ac72fa336563dad1e0ac29b8c79a11fdd0af443293eb8ded5c13658b62884
  • Pointer size: 131 Bytes
  • Size of remote file: 460 kB
tutorials/detection_example/test_images/layout.jpg ADDED

Git LFS Details

  • SHA256: eebbbba76614f82da657edf792f7ebc20e34e4fdf688094fe87a8ea56f1c0bd0
  • Pointer size: 131 Bytes
  • Size of remote file: 256 kB
tutorials/keypointing_example/.DS_Store ADDED
Binary file (6.15 kB). View file
 
tutorials/keypointing_example/_full_tutorial.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
tutorials/keypointing_example/animal_keypointing_example.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Animal keypointing example using Rex Omni
3
+ """
4
+
5
+ from PIL import Image
6
+
7
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
8
+
9
+
10
+ def main():
11
+ # Model path - replace with your actual model path
12
+ model_path = "IDEA-Research/Rex-Omni"
13
+
14
+ print("πŸš€ Initializing Rex Omni model...")
15
+
16
+ # Create wrapper with custom parameters
17
+ rex_model = RexOmniWrapper(
18
+ model_path=model_path,
19
+ backend="transformers", # Choose "transformers" or "vllm"
20
+ max_tokens=2048,
21
+ temperature=0.0,
22
+ top_p=0.05,
23
+ top_k=1,
24
+ repetition_penalty=1.05,
25
+ )
26
+
27
+ # Load image
28
+ image_path = "tutorials/keypointing_example/test_images/animal.png" # Replace with your image path
29
+ image = Image.open(image_path).convert("RGB")
30
+ print(f"βœ… Image loaded successfully!")
31
+ print(f"πŸ“ Image size: {image.size}")
32
+
33
+ # Animal keypointing
34
+ print("🐱 Performing animal keypointing...")
35
+ results = rex_model.inference(
36
+ images=image, task="keypoint", keypoint_type="animal", categories=["cat"]
37
+ )
38
+
39
+ # Process results
40
+ result = results[0]
41
+ if result["success"]:
42
+ predictions = result["extracted_predictions"]
43
+ vis_image = RexOmniVisualize(
44
+ image=image,
45
+ predictions=predictions,
46
+ font_size=6,
47
+ draw_width=6,
48
+ show_labels=True,
49
+ )
50
+
51
+ # Save visualization
52
+ output_path = (
53
+ "tutorials/keypointing_example/test_images/animal_keypointing_visualize.jpg"
54
+ )
55
+ vis_image.save(output_path)
56
+ print(f"βœ… Animal keypointing visualization saved to: {output_path}")
57
+
58
+ else:
59
+ print(f"❌ Inference failed: {result['error']}")
60
+
61
+
62
+ if __name__ == "__main__":
63
+ main()
tutorials/keypointing_example/person_keypointing_example.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Person keypointing example using Rex Omni
3
+ """
4
+
5
+ from PIL import Image
6
+
7
+
8
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
9
+
10
+
11
+ def main():
12
+ # Model path - replace with your actual model path
13
+ model_path = "IDEA-Research/Rex-Omni"
14
+
15
+ print("πŸš€ Initializing Rex Omni model...")
16
+
17
+ # Create wrapper with custom parameters
18
+ rex_model = RexOmniWrapper(
19
+ model_path=model_path,
20
+ backend="transformers", # Choose "transformers" or "vllm"
21
+ max_tokens=2048,
22
+ temperature=0.0,
23
+ top_p=0.05,
24
+ top_k=1,
25
+ repetition_penalty=1.05,
26
+ )
27
+
28
+ # Load image
29
+ image_path = "tutorials/keypointing_example/test_images/person.png" # Replace with your image path
30
+ image = Image.open(image_path).convert("RGB")
31
+ print(f"βœ… Image loaded successfully!")
32
+ print(f"πŸ“ Image size: {image.size}")
33
+
34
+ # Person keypointing
35
+ print("πŸ‘€ Performing person keypointing...")
36
+ results = rex_model.inference(
37
+ images=image, task="keypoint", keypoint_type="person", categories=["person"]
38
+ )
39
+
40
+ # Process results
41
+ result = results[0]
42
+ if result["success"]:
43
+ predictions = result["extracted_predictions"]
44
+ vis_image = RexOmniVisualize(
45
+ image=image,
46
+ predictions=predictions,
47
+ font_size=6,
48
+ draw_width=6,
49
+ show_labels=True,
50
+ )
51
+
52
+ # Save visualization
53
+ output_path = (
54
+ "tutorials/keypointing_example/test_images/person_keypointing_visualize.jpg"
55
+ )
56
+ vis_image.save(output_path)
57
+ print(f"βœ… Person keypointing visualization saved to: {output_path}")
58
+
59
+ else:
60
+ print(f"❌ Inference failed: {result['error']}")
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()
tutorials/keypointing_example/test_images/animal.png ADDED

Git LFS Details

  • SHA256: 03f0272be742c0ee6e7d429aed35a9eaf5dccc30ac716e328341d5d6165f12e1
  • Pointer size: 131 Bytes
  • Size of remote file: 272 kB
tutorials/keypointing_example/test_images/person.png ADDED

Git LFS Details

  • SHA256: 269f5a3414a9fa94d811c19a1b1a0c7260c52552f1d37789b7331a9ff37ec008
  • Pointer size: 131 Bytes
  • Size of remote file: 165 kB
tutorials/ocr_example/.DS_Store ADDED
Binary file (6.15 kB). View file
 
tutorials/ocr_example/_full_tutorial.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
tutorials/ocr_example/ocr_polygon_example.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ OCR text line-level detection example using Rex Omni (polygon format)
6
+ """
7
+
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from PIL import Image
11
+
12
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
13
+
14
+
15
+ def main():
16
+ # Model path - replace with your actual model path
17
+ model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
18
+
19
+ print("πŸš€ Initializing Rex Omni model...")
20
+
21
+ # Create wrapper with custom parameters
22
+ rex_model = RexOmniWrapper(
23
+ model_path=model_path,
24
+ backend="transformers", # Choose "transformers" or "vllm"
25
+ max_tokens=2048,
26
+ temperature=0.0,
27
+ top_p=0.05,
28
+ top_k=1,
29
+ repetition_penalty=1.05,
30
+ )
31
+
32
+ # Load image
33
+ image_path = (
34
+ "tutorials/ocr_example/test_images/ocr.png" # Replace with your image path
35
+ )
36
+ image = Image.open(image_path).convert("RGB")
37
+ print(f"βœ… Image loaded successfully!")
38
+ print(f"πŸ“ Image size: {image.size}")
39
+
40
+ # OCR text line-level detection in polygon format
41
+ categories = ["text line"]
42
+
43
+ print("πŸ” Performing text line-level OCR detection (polygon format)...")
44
+ results = rex_model.inference(
45
+ images=image, task="ocr_polygon", categories=categories
46
+ )
47
+
48
+ # Process results
49
+ result = results[0]
50
+ if result["success"]:
51
+ predictions = result["extracted_predictions"]
52
+ vis_image = RexOmniVisualize(
53
+ image=image,
54
+ predictions=predictions,
55
+ font_size=15,
56
+ draw_width=5,
57
+ show_labels=True,
58
+ )
59
+
60
+ # Save visualization
61
+ output_path = "tutorials/ocr_example/test_images/ocr_polygon_visualize.jpg"
62
+ vis_image.save(output_path)
63
+ print(f"βœ… Polygon OCR visualization saved to: {output_path}")
64
+ else:
65
+ print(f"❌ Inference failed: {result['error']}")
66
+
67
+
68
+ if __name__ == "__main__":
69
+ main()
tutorials/ocr_example/ocr_textline_box_example.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ OCR text line-level detection example using Rex Omni (box format)
6
+ """
7
+
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from PIL import Image
11
+
12
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
13
+
14
+
15
+ def main():
16
+ # Model path - replace with your actual model path
17
+ model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
18
+
19
+ print("πŸš€ Initializing Rex Omni model...")
20
+
21
+ # Create wrapper with custom parameters
22
+ rex_model = RexOmniWrapper(
23
+ model_path=model_path,
24
+ backend="transformers", # Choose "transformers" or "vllm"
25
+ max_tokens=2048,
26
+ temperature=0.0,
27
+ top_p=0.05,
28
+ top_k=1,
29
+ repetition_penalty=1.05,
30
+ )
31
+
32
+ # Load image
33
+ image_path = (
34
+ "tutorials/ocr_example/test_images/ocr.png" # Replace with your image path
35
+ )
36
+ image = Image.open(image_path).convert("RGB")
37
+ print(f"βœ… Image loaded successfully!")
38
+ print(f"πŸ“ Image size: {image.size}")
39
+
40
+ # OCR text line-level detection in box format
41
+ categories = ["text line"]
42
+
43
+ print("πŸ” Performing text line-level OCR detection...")
44
+ results = rex_model.inference(images=image, task="ocr_box", categories=categories)
45
+
46
+ # Process results
47
+ result = results[0]
48
+ if result["success"]:
49
+ predictions = result["extracted_predictions"]
50
+ vis_image = RexOmniVisualize(
51
+ image=image,
52
+ predictions=predictions,
53
+ font_size=15,
54
+ draw_width=3,
55
+ show_labels=True,
56
+ )
57
+
58
+ # Save visualization
59
+ output_path = "tutorials/ocr_example/test_images/ocr_textline_box_visualize.jpg"
60
+ vis_image.save(output_path)
61
+ print(f"βœ… Text line-level OCR visualization saved to: {output_path}")
62
+
63
+ else:
64
+ print(f"❌ Inference failed: {result['error']}")
65
+
66
+
67
+ if __name__ == "__main__":
68
+ main()
tutorials/ocr_example/ocr_word_box_example.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ OCR word-level detection example using Rex Omni (box format)
6
+ """
7
+
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from PIL import Image
11
+
12
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
13
+
14
+
15
+ def main():
16
+ # Model path - replace with your actual model path
17
+ model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
18
+
19
+ print("πŸš€ Initializing Rex Omni model...")
20
+
21
+ # Create wrapper with custom parameters
22
+ rex_model = RexOmniWrapper(
23
+ model_path=model_path,
24
+ backend="transformers", # Choose "transformers" or "vllm"
25
+ max_tokens=2048,
26
+ temperature=0.0,
27
+ top_p=0.05,
28
+ top_k=1,
29
+ repetition_penalty=1.05,
30
+ )
31
+
32
+ # Load image
33
+ image_path = (
34
+ "tutorials/ocr_example/test_images/ocr.png" # Replace with your image path
35
+ )
36
+ image = Image.open(image_path).convert("RGB")
37
+ print(f"βœ… Image loaded successfully!")
38
+ print(f"πŸ“ Image size: {image.size}")
39
+
40
+ # OCR word-level detection in box format
41
+ categories = ["word"]
42
+
43
+ print("πŸ” Performing word-level OCR detection...")
44
+ results = rex_model.inference(images=image, task="ocr_box", categories=categories)
45
+
46
+ # Process results
47
+ result = results[0]
48
+ if result["success"]:
49
+ predictions = result["extracted_predictions"]
50
+ vis_image = RexOmniVisualize(
51
+ image=image,
52
+ predictions=predictions,
53
+ font_size=20,
54
+ draw_width=5,
55
+ show_labels=True,
56
+ )
57
+
58
+ # Save visualization
59
+ output_path = "tutorials/ocr_example/test_images/ocr_word_box_visualize.jpg"
60
+ vis_image.save(output_path)
61
+ print(f"βœ… Word-level OCR visualization saved to: {output_path}")
62
+
63
+ else:
64
+ print(f"❌ Inference failed: {result['error']}")
65
+
66
+
67
+ if __name__ == "__main__":
68
+ main()
tutorials/ocr_example/test_images/ocr.png ADDED

Git LFS Details

  • SHA256: 840960ea5f5dad0d0608b602ada59ece9b9eef032e6d56df11bbaaf6a31bd35e
  • Pointer size: 131 Bytes
  • Size of remote file: 288 kB
tutorials/other_example/batch_inference.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Batch Inference example using Rex-Omni
6
+ """
7
+
8
+ import torch
9
+ from PIL import Image
10
+
11
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
12
+
13
+
14
+ def main():
15
+ # Model path - replace with your actual model path
16
+ model_path = "IDEA-Research/Rex-Omni"
17
+
18
+ # Create wrapper with custom parameters
19
+ rex_model = RexOmniWrapper(
20
+ model_path=model_path,
21
+ backend="transformers", # or "vllm" for faster inference
22
+ max_tokens=4096,
23
+ temperature=0.0,
24
+ top_p=0.05,
25
+ top_k=1,
26
+ repetition_penalty=1.05,
27
+ )
28
+
29
+ # Load imag
30
+ image_paths = [
31
+ "tutorials/detection_example/test_images/cafe.jpg",
32
+ "tutorials/detection_example/test_images/boys.jpg",
33
+ ]
34
+ images = [Image.open(image_path).convert("RGB") for image_path in image_paths]
35
+
36
+ # Object detection
37
+ categories = [
38
+ [
39
+ "man",
40
+ "woman",
41
+ "yellow flower",
42
+ "sofa",
43
+ "robot-shope light",
44
+ "blanket",
45
+ "microwave",
46
+ "laptop",
47
+ "cup",
48
+ "white chair",
49
+ "lamp",
50
+ ],
51
+ [
52
+ "boys holding microphone",
53
+ "boy playing piano",
54
+ "the four guitars on the wall",
55
+ "the guitar in someone's hand",
56
+ ],
57
+ ]
58
+
59
+ results = rex_model.inference(
60
+ images=images, task=["detection", "detection"], categories=categories
61
+ )
62
+
63
+ # Print results
64
+ batch_idx = 0
65
+ for result, image in zip(results, images):
66
+ if result["success"]:
67
+ predictions = result["extracted_predictions"]
68
+ vis_image = RexOmniVisualize(
69
+ image=image,
70
+ predictions=predictions,
71
+ font_size=20,
72
+ draw_width=5,
73
+ show_labels=True,
74
+ )
75
+ # Save visualization
76
+ output_path = f"tutorials/other_example/batch_inference_{batch_idx}.jpg"
77
+ vis_image.save(output_path)
78
+ print(f"Visualization saved to: {output_path}")
79
+
80
+ else:
81
+ print(f"Inference failed: {result['error']}")
82
+ batch_idx += 1
83
+
84
+
85
+ if __name__ == "__main__":
86
+ main()
tutorials/pointing_example.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Basic object detection example using Rex Omni
6
+ """
7
+
8
+ import torch
9
+ from PIL import Image
10
+ from rex_omni import RexOmniWrapper, visualize_predictions
11
+
12
+
13
+ def main():
14
+ # Model path - replace with your actual model path
15
+ model_path = "IDEA-Research/Rex-Omni"
16
+
17
+ # Create wrapper with custom parameters
18
+ rex_model = RexOmniWrapper(
19
+ model_path=model_path,
20
+ backend="transformers", # or "vllm" for faster inference
21
+ max_tokens=2048,
22
+ temperature=0.0,
23
+ top_p=0.05,
24
+ top_k=1,
25
+ repetition_penalty=1.05,
26
+ )
27
+
28
+ # Load image
29
+ image_path = "examples/test_images/pigeon.jpeg" # Replace with your image path
30
+ image = Image.open(image_path).convert("RGB")
31
+
32
+ # Object detection
33
+ categories = ["pigeons"]
34
+
35
+ results = rex_model.inference(images=image, task="pointing", categories=categories)
36
+
37
+ # Print results
38
+ result = results[0]
39
+ if result["success"]:
40
+ predictions = result["extracted_predictions"]
41
+ vis_image = visualize_predictions(
42
+ image=image,
43
+ predictions=predictions,
44
+ font_size=20,
45
+ draw_width=10,
46
+ show_labels=True,
47
+ )
48
+ # Save visualization
49
+ output_path = "examples/test_images/pigeon_visualize.jpg"
50
+ vis_image.save(output_path)
51
+ print(f"Visualization saved to: {output_path}")
52
+
53
+ else:
54
+ print(f"Inference failed: {result['error']}")
55
+
56
+
57
+ if __name__ == "__main__":
58
+ main()
tutorials/pointing_example/.DS_Store ADDED
Binary file (6.15 kB). View file
 
tutorials/pointing_example/_full_tutorial.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
tutorials/pointing_example/affordance_pointing_example.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Affordance pointing example using Rex Omni
6
+ """
7
+
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from PIL import Image
11
+
12
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
13
+
14
+
15
+ def main():
16
+ # Model path - replace with your actual model path
17
+ model_path = "IDEA-Research/Rex-Omni"
18
+
19
+ print("πŸš€ Initializing Rex Omni model...")
20
+
21
+ # Create wrapper with custom parameters
22
+ rex_model = RexOmniWrapper(
23
+ model_path=model_path,
24
+ backend="transformers", # Choose "transformers" or "vllm"
25
+ max_tokens=2048,
26
+ temperature=0.0,
27
+ top_p=0.05,
28
+ top_k=1,
29
+ repetition_penalty=1.05,
30
+ )
31
+
32
+ # Load image
33
+ image_path = (
34
+ "tutorials/pointing_example/test_images/cup.png" # Replace with your image path
35
+ )
36
+ image = Image.open(image_path).convert("RGB")
37
+ print(f"βœ… Image loaded successfully!")
38
+ print(f"πŸ“ Image size: {image.size}")
39
+
40
+ # Affordance pointing - where to hold the cup
41
+ categories = ["where I can hold the green cup"]
42
+
43
+ print("🀏 Performing affordance pointing...")
44
+ results = rex_model.inference(images=image, task="pointing", categories=categories)
45
+
46
+ # Process results
47
+ result = results[0]
48
+ if result["success"]:
49
+ predictions = result["extracted_predictions"]
50
+ vis_image = RexOmniVisualize(
51
+ image=image,
52
+ predictions=predictions,
53
+ font_size=20,
54
+ draw_width=10,
55
+ show_labels=True,
56
+ )
57
+
58
+ # Save visualization
59
+ output_path = (
60
+ "tutorials/pointing_example/test_images/affordance_pointing_visualize.jpg"
61
+ )
62
+ vis_image.save(output_path)
63
+ print(f"βœ… Affordance pointing visualization saved to: {output_path}")
64
+ else:
65
+ print(f"❌ Inference failed: {result['error']}")
66
+
67
+
68
+ if __name__ == "__main__":
69
+ main()
tutorials/pointing_example/gui_pointing_example.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ GUI pointing example using Rex Omni
6
+ """
7
+
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from PIL import Image
11
+
12
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
13
+
14
+
15
+ def main():
16
+ # Model path - replace with your actual model path
17
+ model_path = "IDEA-Research/Rex-Omni"
18
+
19
+ print("πŸš€ Initializing Rex Omni model...")
20
+
21
+ # Create wrapper with custom parameters
22
+ rex_model = RexOmniWrapper(
23
+ model_path=model_path,
24
+ backend="transformers", # Choose "transformers" or "vllm"
25
+ max_tokens=2048,
26
+ temperature=0.0,
27
+ top_p=0.05,
28
+ top_k=1,
29
+ repetition_penalty=1.05,
30
+ )
31
+
32
+ # Load image
33
+ image_path = "tutorials/detection_example/test_images/gui.png" # Replace with your image path
34
+ image = Image.open(image_path).convert("RGB")
35
+ print(f"βœ… Image loaded successfully!")
36
+ print(f"πŸ“ Image size: {image.size}")
37
+
38
+ # GUI pointing - find specific UI element
39
+ categories = ["element 'pause current song'"]
40
+
41
+ print("πŸ–±οΈ Performing GUI pointing...")
42
+ results = rex_model.inference(images=image, task="pointing", categories=categories)
43
+
44
+ # Process results
45
+ result = results[0]
46
+ if result["success"]:
47
+ predictions = result["extracted_predictions"]
48
+ vis_image = RexOmniVisualize(
49
+ image=image,
50
+ predictions=predictions,
51
+ font_size=50,
52
+ draw_width=15,
53
+ show_labels=True,
54
+ )
55
+
56
+ # Save visualization
57
+ output_path = (
58
+ "tutorials/pointing_example/test_images/gui_pointing_visualize.jpg"
59
+ )
60
+ vis_image.save(output_path)
61
+ print(f"βœ… GUI pointing visualization saved to: {output_path}")
62
+
63
+ else:
64
+ print(f"❌ Inference failed: {result['error']}")
65
+
66
+
67
+ if __name__ == "__main__":
68
+ main()
tutorials/pointing_example/object_pointing_example.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Object pointing example using Rex Omni
6
+ """
7
+
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from PIL import Image
11
+
12
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
13
+
14
+
15
+ def main():
16
+ # Model path - replace with your actual model path
17
+ model_path = "IDEA-Research/Rex-Omni"
18
+
19
+ print("πŸš€ Initializing Rex Omni model...")
20
+
21
+ # Create wrapper with custom parameters
22
+ rex_model = RexOmniWrapper(
23
+ model_path=model_path,
24
+ backend="transformers", # Choose "transformers" or "vllm"
25
+ max_tokens=2048,
26
+ temperature=0.0,
27
+ top_p=0.05,
28
+ top_k=1,
29
+ repetition_penalty=1.05,
30
+ )
31
+
32
+ # Load image
33
+ image_path = "tutorials/pointing_example/test_images/boxes.jpg" # Replace with your image path
34
+ image = Image.open(image_path).convert("RGB")
35
+ print(f"βœ… Image loaded successfully!")
36
+ print(f"πŸ“ Image size: {image.size}")
37
+
38
+ # Object pointing
39
+ categories = ["open boxes", "closed boxes"]
40
+
41
+ print("🎯 Performing object pointing...")
42
+ results = rex_model.inference(images=image, task="pointing", categories=categories)
43
+
44
+ # Process results
45
+ result = results[0]
46
+ if result["success"]:
47
+ predictions = result["extracted_predictions"]
48
+ vis_image = RexOmniVisualize(
49
+ image=image,
50
+ predictions=predictions,
51
+ font_size=30,
52
+ draw_width=10,
53
+ show_labels=True,
54
+ )
55
+
56
+ # Save visualization
57
+ output_path = (
58
+ "tutorials/pointing_example/test_images/object_pointing_visualize.jpg"
59
+ )
60
+ vis_image.save(output_path)
61
+ print(f"βœ… Object pointing visualization saved to: {output_path}")
62
+ else:
63
+ print(f"❌ Inference failed: {result['error']}")
64
+
65
+
66
+ if __name__ == "__main__":
67
+ main()
tutorials/pointing_example/test_images/boxes.jpg ADDED

Git LFS Details

  • SHA256: 753771d345263b58c16dafb5b6c4e730d21d211361b57933cb496ada3f2c311f
  • Pointer size: 131 Bytes
  • Size of remote file: 350 kB
tutorials/pointing_example/test_images/cup.png ADDED
tutorials/visual_prompting_example/_full_tutorial.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
tutorials/visual_prompting_example/test_images/pigeons.jpeg ADDED

Git LFS Details

  • SHA256: b4b2db805cfec8704516645df54d01c0dd21c4447250889ac655575622556689
  • Pointer size: 132 Bytes
  • Size of remote file: 1.14 MB
tutorials/visual_prompting_example/visual_prompt_example.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Object pointing example using Rex Omni
6
+ """
7
+
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from PIL import Image
11
+
12
+ from rex_omni import RexOmniVisualize, RexOmniWrapper
13
+
14
+
15
+ def main():
16
+ # Model path - replace with your actual model path
17
+ model_path = "IDEA-Research/Rex-Omni"
18
+
19
+ print("πŸš€ Initializing Rex Omni model...")
20
+
21
+ # Create wrapper with custom parameters
22
+ rex_model = RexOmniWrapper(
23
+ model_path=model_path,
24
+ backend="transformers", # Choose "transformers" or "vllm"
25
+ max_tokens=2048,
26
+ temperature=0.0,
27
+ top_p=0.05,
28
+ top_k=1,
29
+ repetition_penalty=1.05,
30
+ )
31
+
32
+ # Load image
33
+ image_path = "tutorials/visual_prompting_example/test_images/pigeons.jpeg" # Replace with your image path
34
+ image = Image.open(image_path).convert("RGB")
35
+ print(f"βœ… Image loaded successfully!")
36
+ print(f"πŸ“ Image size: {image.size}")
37
+
38
+ visual_prompts = [
39
+ [644, 1210, 842, 1361],
40
+ [1180, 1066, 1227, 1160], # Box 3: bottom region
41
+ ]
42
+
43
+ print("🎯 Performing object pointing...")
44
+ results = rex_model.inference(
45
+ images=image,
46
+ task="visual_prompting",
47
+ visual_prompt_boxes=visual_prompts,
48
+ )
49
+
50
+ # Process results
51
+ result = results[0]
52
+ if result["success"]:
53
+ predictions = result["extracted_predictions"]
54
+ vis_image = RexOmniVisualize(
55
+ image=image,
56
+ predictions=predictions,
57
+ font_size=30,
58
+ draw_width=10,
59
+ show_labels=True,
60
+ )
61
+
62
+ # Save visualization
63
+ output_path = (
64
+ "tutorials/visual_prompting_example/test_images/pigeons_visualize.jpg"
65
+ )
66
+ vis_image.save(output_path)
67
+ else:
68
+ print(f"❌ Inference failed: {result['error']}")
69
+
70
+
71
+ if __name__ == "__main__":
72
+ main()