Update model code
Browse files
    	
        InternVL2_5-72B-Pretrain/configuration_internvl_chat.py
    CHANGED
    
    | @@ -63,6 +63,8 @@ class InternVLChatConfig(PretrainedConfig): | |
| 63 | 
             
                    self.ps_version = ps_version  # pixel shuffle version
         | 
| 64 | 
             
                    self.min_dynamic_patch = min_dynamic_patch
         | 
| 65 | 
             
                    self.max_dynamic_patch = max_dynamic_patch
         | 
|  | |
|  | |
| 66 |  | 
| 67 | 
             
                    logger.info(f'vision_select_layer: {self.select_layer}')
         | 
| 68 | 
             
                    logger.info(f'ps_version: {self.ps_version}')
         | 
|  | |
| 63 | 
             
                    self.ps_version = ps_version  # pixel shuffle version
         | 
| 64 | 
             
                    self.min_dynamic_patch = min_dynamic_patch
         | 
| 65 | 
             
                    self.max_dynamic_patch = max_dynamic_patch
         | 
| 66 | 
            +
                    # By default, we use tie_word_embeddings=False for models of all sizes.
         | 
| 67 | 
            +
                    self.tie_word_embeddings = self.llm_config.tie_word_embeddings
         | 
| 68 |  | 
| 69 | 
             
                    logger.info(f'vision_select_layer: {self.select_layer}')
         | 
| 70 | 
             
                    logger.info(f'ps_version: {self.ps_version}')
         | 
    	
        InternVL2_5-72B-Pretrain/modeling_intern_vit.py
    CHANGED
    
    | @@ -364,6 +364,7 @@ class InternVisionEncoder(nn.Module): | |
| 364 | 
             
            class InternVisionModel(PreTrainedModel):
         | 
| 365 | 
             
                main_input_name = 'pixel_values'
         | 
| 366 | 
             
                _supports_flash_attn_2 = True
         | 
|  | |
| 367 | 
             
                config_class = InternVisionConfig
         | 
| 368 | 
             
                _no_split_modules = ['InternVisionEncoderLayer']
         | 
| 369 |  | 
|  | |
| 364 | 
             
            class InternVisionModel(PreTrainedModel):
         | 
| 365 | 
             
                main_input_name = 'pixel_values'
         | 
| 366 | 
             
                _supports_flash_attn_2 = True
         | 
| 367 | 
            +
                supports_gradient_checkpointing = True
         | 
| 368 | 
             
                config_class = InternVisionConfig
         | 
| 369 | 
             
                _no_split_modules = ['InternVisionEncoderLayer']
         | 
| 370 |  | 
    	
        InternVL2_5-72B-Pretrain/modeling_internvl_chat.py
    CHANGED
    
    | @@ -37,6 +37,7 @@ class InternVLChatModel(PreTrainedModel): | |
| 37 | 
             
                main_input_name = 'pixel_values'
         | 
| 38 | 
             
                base_model_prefix = 'language_model'
         | 
| 39 | 
             
                _supports_flash_attn_2 = True
         | 
|  | |
| 40 | 
             
                _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'Qwen2DecoderLayer']
         | 
| 41 |  | 
| 42 | 
             
                def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None, use_flash_attn=True):
         | 
| @@ -346,3 +347,13 @@ class InternVLChatModel(PreTrainedModel): | |
| 346 | 
             
                    )
         | 
| 347 |  | 
| 348 | 
             
                    return outputs
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 37 | 
             
                main_input_name = 'pixel_values'
         | 
| 38 | 
             
                base_model_prefix = 'language_model'
         | 
| 39 | 
             
                _supports_flash_attn_2 = True
         | 
| 40 | 
            +
                supports_gradient_checkpointing = True
         | 
| 41 | 
             
                _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'Qwen2DecoderLayer']
         | 
| 42 |  | 
| 43 | 
             
                def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None, use_flash_attn=True):
         | 
|  | |
| 347 | 
             
                    )
         | 
| 348 |  | 
| 349 | 
             
                    return outputs
         | 
| 350 | 
            +
             | 
| 351 | 
            +
                @property
         | 
| 352 | 
            +
                def lm_head(self):
         | 
| 353 | 
            +
                    return self.language_model.get_output_embeddings()
         | 
| 354 | 
            +
             | 
| 355 | 
            +
                def get_input_embeddings(self):
         | 
| 356 | 
            +
                    return self.language_model.get_input_embeddings()
         | 
| 357 | 
            +
             | 
| 358 | 
            +
                def get_output_embeddings(self):
         | 
| 359 | 
            +
                    return self.language_model.get_output_embeddings()
         | 
