configuration_dots.py CHANGED
@@ -3,6 +3,7 @@ from transformers.configuration_utils import PretrainedConfig
3
  from transformers.models.qwen2 import Qwen2Config
4
  from transformers import Qwen2_5_VLProcessor, AutoProcessor
5
  from transformers.models.auto.configuration_auto import CONFIG_MAPPING
 
6
 
7
 
8
  class DotsVisionConfig(PretrainedConfig):
@@ -68,7 +69,8 @@ class DotsOCRConfig(Qwen2Config):
68
 
69
  class DotsVLProcessor(Qwen2_5_VLProcessor):
70
  def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
71
- super().__init__(image_processor, tokenizer, chat_template=chat_template)
 
72
  self.image_token = "<|imgpad|>" if not hasattr(tokenizer, "image_token") else tokenizer.image_token
73
 
74
 
 
3
  from transformers.models.qwen2 import Qwen2Config
4
  from transformers import Qwen2_5_VLProcessor, AutoProcessor
5
  from transformers.models.auto.configuration_auto import CONFIG_MAPPING
6
+ from transformers.video_processing_utils import BaseVideoProcessor
7
 
8
 
9
  class DotsVisionConfig(PretrainedConfig):
 
69
 
70
  class DotsVLProcessor(Qwen2_5_VLProcessor):
71
  def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
72
+ dummy_processor = BaseVideoProcessor()
73
+ super().__init__(image_processor, tokenizer, video_processor=dummy_processor, chat_template=chat_template)
74
  self.image_token = "<|imgpad|>" if not hasattr(tokenizer, "image_token") else tokenizer.image_token
75
 
76
 
preprocessor_config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "min_pixels": 3136,
3
  "max_pixels": 11289600,
4
  "patch_size": 14,
 
1
  {
2
+ "auto_map": {
3
+ "AutoProcessor": "configuration_dots.DotsVLProcessor"
4
+ },
5
  "min_pixels": 3136,
6
  "max_pixels": 11289600,
7
  "patch_size": 14,