| from transformers import AutoConfig, Qwen2Config | |
| from typing import Tuple | |
| class XOmniConfig(Qwen2Config): | |
| model_type = "x-omni" | |
| def __init__( | |
| self, | |
| num_mm_adap_layers: int = 4, | |
| num_mm_head_layers: int = 4, | |
| mm_vocab_size: int = 16448, | |
| image_vocab_size: int = 16384, | |
| mm_special_tokens: Tuple[str] = ('<SOM>', '<EOM>', '<IMAGE>'), | |
| **kwargs, | |
| ): | |
| super().__init__(**kwargs) | |
| self.num_mm_adap_layers = num_mm_adap_layers | |
| self.num_mm_head_layers = num_mm_head_layers | |
| self.mm_vocab_size = mm_vocab_size | |
| self.image_vocab_size = image_vocab_size | |
| self.mm_special_tokens = mm_special_tokens | |
| AutoConfig.register("x-omni", XOmniConfig) | |