momergul commited on
Commit
2e966e8
·
verified ·
1 Parent(s): 607b096

Upload processor_git.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. processor_git.py +8 -7
processor_git.py CHANGED
@@ -1,6 +1,7 @@
1
  from transformers import ProcessorMixin, AutoProcessor
2
  from transformers.models.auto.processing_auto import AutoProcessor
3
  from transformers.processing_utils import ProcessorMixin
 
4
  import json
5
  import os
6
 
@@ -8,12 +9,12 @@ class GITProcessor(ProcessorMixin):
8
  """
9
  Custom processor that combines a tokenizer and feature extractor.
10
  """
11
- attributes = ["feature_extractor", "tokenizer"]
12
- feature_extractor_class = "AutoImageProcessor"
13
  tokenizer_class = "AutoTokenizer"
14
 
15
- def __init__(self, feature_extractor, tokenizer):
16
- super().__init__(feature_extractor, tokenizer)
17
 
18
  def __call__(self, text=None, images=None, **kwargs):
19
  """
@@ -22,7 +23,7 @@ class GITProcessor(ProcessorMixin):
22
  Args:
23
  text: Text input(s) to tokenize
24
  images: Image input(s) to process
25
- **kwargs: Additional arguments passed to tokenizer/feature_extractor
26
 
27
  Returns:
28
  Dictionary with processed inputs
@@ -39,12 +40,12 @@ class GITProcessor(ProcessorMixin):
39
 
40
  # Process images if provided
41
  if images is not None:
42
- image_encoding = self.feature_extractor(images, **kwargs)
43
  # Add prefix to avoid key conflicts
44
  for key, value in image_encoding.items():
45
  encoding[f"pixel_values" if key == "pixel_values" else f"image_{key}"] = value
46
 
47
- return encoding
48
 
49
  def batch_decode(self, *args, **kwargs):
50
  """
 
1
  from transformers import ProcessorMixin, AutoProcessor
2
  from transformers.models.auto.processing_auto import AutoProcessor
3
  from transformers.processing_utils import ProcessorMixin
4
+ from transformers.tokenization_utils_base import BatchEncoding
5
  import json
6
  import os
7
 
 
9
  """
10
  Custom processor that combines a tokenizer and feature extractor.
11
  """
12
+ attributes = ["image_processor", "tokenizer"]
13
+ image_processor_class = "AutoImageProcessor"
14
  tokenizer_class = "AutoTokenizer"
15
 
16
+ def __init__(self, image_processor, tokenizer):
17
+ super().__init__(image_processor, tokenizer)
18
 
19
  def __call__(self, text=None, images=None, **kwargs):
20
  """
 
23
  Args:
24
  text: Text input(s) to tokenize
25
  images: Image input(s) to process
26
+ **kwargs: Additional arguments passed to tokenizer/image_processor
27
 
28
  Returns:
29
  Dictionary with processed inputs
 
40
 
41
  # Process images if provided
42
  if images is not None:
43
+ image_encoding = self.image_processor(images, **kwargs)
44
  # Add prefix to avoid key conflicts
45
  for key, value in image_encoding.items():
46
  encoding[f"pixel_values" if key == "pixel_values" else f"image_{key}"] = value
47
 
48
+ return BatchEncoding(encoding)
49
 
50
  def batch_decode(self, *args, **kwargs):
51
  """