| """HuluMed model configuration.""" | |
| import importlib.util | |
| import os.path as osp | |
| from typing import Optional, Dict, Any | |
| from transformers import AutoConfig, AutoModel, PretrainedConfig, Qwen3Config | |
| try: | |
| from .configuration_hulumed_encoder import HulumedVisionEncoderConfig | |
| except ModuleNotFoundError: | |
| spec = importlib.util.spec_from_file_location( | |
| "configuration_hulumed_encoder", | |
| osp.join(osp.dirname(__file__), "configuration_hulumed_encoder.py"), | |
| ) | |
| configuration_hulumed_encoder = importlib.util.module_from_spec(spec) | |
| spec.loader.exec_module(configuration_hulumed_encoder) | |
| HulumedVisionEncoderConfig = getattr( | |
| configuration_hulumed_encoder, | |
| "HulumedVisionEncoderConfig", | |
| ) | |
| try: | |
| from .modeling_hulumed_encoder import HulumedVisionEncoderModel | |
| except ModuleNotFoundError: | |
| spec = importlib.util.spec_from_file_location( | |
| "modeling_hulumed_encoder", | |
| osp.join(osp.dirname(__file__), "modeling_hulumed_encoder.py"), | |
| ) | |
| modeling_hulumed_encoder = importlib.util.module_from_spec(spec) | |
| spec.loader.exec_module(modeling_hulumed_encoder) | |
| HulumedVisionEncoderModel = getattr( | |
| modeling_hulumed_encoder, | |
| "HulumedVisionEncoderModel", | |
| ) | |
| AutoConfig.register("hulumed_vision_encoder", HulumedVisionEncoderConfig) | |
| AutoModel.register(HulumedVisionEncoderConfig, HulumedVisionEncoderModel) | |
| class HulumedQwen3Config(Qwen3Config): | |
| """ | |
| HuluMed model configuration. | |
| This configuration class extends Qwen2Config to store the configuration of a HuluMed model. | |
| It includes configuration for the vision encoder and multimodal projector. | |
| """ | |
| model_type = "hulumed_qwen3" | |
| sub_configs = {"vision_encoder_config": HulumedVisionEncoderConfig} | |
| def __init__( | |
| self, | |
| vision_encoder: Optional[str] = None, | |
| vision_encoder_config: Dict[str, Any] = {}, | |
| mm_projector_type: str = "mlp2x_gelu", | |
| use_token_compression: bool = True, | |
| image_token_index: int = -1, | |
| **kwargs, | |
| ): | |
| """ | |
| Initialize HuluMed configuration. | |
| Args: | |
| vision_encoder (str, optional): Path or identifier of the vision encoder. | |
| vision_encoder_config (dict, optional): Configuration for the vision encoder. | |
| mm_projector_type (str): Type of multimodal projector. Default is "mlp2x_gelu". | |
| use_token_compression (bool): Whether to use token compression for videos. Default is True. | |
| image_token_index (int): Token index for image placeholders. Default is -1. | |
| **kwargs: Additional arguments passed to Qwen2Config. | |
| """ | |
| super().__init__(**kwargs) | |
| self.model_type = "hulumed_qwen3" | |
| self.vision_encoder = vision_encoder | |
| if vision_encoder_config is not None and not isinstance(vision_encoder_config, PretrainedConfig): | |
| vision_encoder_config = HulumedVisionEncoderConfig(**vision_encoder_config) | |
| self.vision_encoder_config = vision_encoder_config | |
| self.mm_projector_type = mm_projector_type | |
| self.use_token_compression = use_token_compression | |
| self.image_token_index = image_token_index |