File size: 939 Bytes
b9b12c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from transformers import SmolVLMProcessor
from typing import TYPE_CHECKING, Dict, List, Optional, Union


class SmolVLMQwen3Processor(SmolVLMProcessor):
    attributes = ["image_processor", "tokenizer"]
    valid_kwargs = ["image_seq_len", "chat_template"]
    image_processor_class = "SmolVLMImageProcessor"
    tokenizer_class = "Qwen2TokenizerFast"

    def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 169, chat_template: Optional[str] = None,
                 **kwargs):
        super().__init__(image_processor, tokenizer, image_seq_len, chat_template=chat_template, **kwargs)

        self.fake_image_token = "<vision_start>"
        self.image_token = "<|image_pad|>"
        self.image_token_id = 151655
        self.end_of_utterance_token = "<im_end>"
        self.global_image_token = "<|vision_pad|>"
        self.video_token = "<|vision_pad|>"
        self.chat_template = self.tokenizer.chat_template