rhymes-ai
/

Aria

@@ -70,11 +70,12 @@ class AriaProcessor(ProcessorMixin):
             self.tokenizer = AutoTokenizer.from_pretrained(
                 tokenizer, trust_remote_code=True, use_fast=False
             )
-            if self.tokenizer.pad_token is None:
-                self.tokenizer.pad_token = self.tokenizer.unk_token
         else:
             self.tokenizer = tokenizer
         self.image_token = image_token
     # Copied from transformers.models.llava_next.processing_llave_next.LlavaNextProcessor.__call__
@@ -227,9 +228,12 @@ class AriaProcessor(ProcessorMixin):
             image_processor_path,
             **cls._extract_kwargs(AriaVisionProcessor.from_pretrained, **kwargs),
         )
         try:
             tokenizer = AutoTokenizer.from_pretrained(
                 tokenizer_path,
                 **cls._extract_kwargs(AutoTokenizer.from_pretrained, **kwargs),
             )
             chat_template = tokenizer.chat_template

             self.tokenizer = AutoTokenizer.from_pretrained(
                 tokenizer, trust_remote_code=True, use_fast=False
             )
         else:
             self.tokenizer = tokenizer
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.unk_token
         self.image_token = image_token
     # Copied from transformers.models.llava_next.processing_llave_next.LlavaNextProcessor.__call__
             image_processor_path,
             **cls._extract_kwargs(AriaVisionProcessor.from_pretrained, **kwargs),
         )
+        if "use_fast" in kwargs:
+            kwargs.pop("use_fast")
         try:
             tokenizer = AutoTokenizer.from_pretrained(
                 tokenizer_path,
+                use_fast=False,
                 **cls._extract_kwargs(AutoTokenizer.from_pretrained, **kwargs),
             )
             chat_template = tokenizer.chat_template

vision_processor.py CHANGED Viewed

@@ -210,14 +210,25 @@ class AriaVisionProcessor(BaseImageProcessor):
         return_tensors: Optional[Union[str, TensorType]] = "pt",
         split_image: Optional[bool] = False,
         split_ratio: Optional[List[List[int]]] = [
-            [1, 1],
             [1, 2],
             [1, 3],
             [1, 4],
             [2, 2],
             [2, 1],
             [3, 1],
             [4, 1],
         ],
     ):
         """
@@ -279,14 +290,25 @@ class AriaVisionProcessor(BaseImageProcessor):
         return_tensors: Optional[Union[str, TensorType]] = None,
         split_image: Optional[bool] = False,
         split_ratio: Optional[List[List[int]]] = [
-            [1, 1],
             [1, 2],
             [1, 3],
             [1, 4],
             [2, 2],
             [2, 1],
             [3, 1],
             [4, 1],
         ],
     ):
         return self.__call__(
@@ -296,4 +318,4 @@ class AriaVisionProcessor(BaseImageProcessor):
             return_tensors=return_tensors,
             split_image=split_image,
             split_ratio=split_ratio,
-        )

         return_tensors: Optional[Union[str, TensorType]] = "pt",
         split_image: Optional[bool] = False,
         split_ratio: Optional[List[List[int]]] = [
             [1, 2],
             [1, 3],
             [1, 4],
+            [1, 5],
+            [1, 6],
+            [1, 7],
+            [1, 8],
+            [2, 4],
+            [2, 3],
             [2, 2],
             [2, 1],
             [3, 1],
+            [3, 2],
             [4, 1],
+            [4, 2],
+            [5, 1],
+            [6, 1],
+            [7, 1],
+            [8, 1],
         ],
     ):
         """
         return_tensors: Optional[Union[str, TensorType]] = None,
         split_image: Optional[bool] = False,
         split_ratio: Optional[List[List[int]]] = [
             [1, 2],
             [1, 3],
             [1, 4],
+            [1, 5],
+            [1, 6],
+            [1, 7],
+            [1, 8],
+            [2, 4],
+            [2, 3],
             [2, 2],
             [2, 1],
             [3, 1],
+            [3, 2],
             [4, 1],
+            [4, 2],
+            [5, 1],
+            [6, 1],
+            [7, 1],
+            [8, 1],
         ],
     ):
         return self.__call__(
             return_tensors=return_tensors,
             split_image=split_image,
             split_ratio=split_ratio,
+        )