File size: 3,226 Bytes
d8e2cf1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
"""HuluMed model configuration."""
import importlib.util
import os.path as osp
from typing import Optional, Dict, Any
from transformers import AutoConfig, AutoModel, PretrainedConfig, Qwen3Config
try:
from .configuration_hulumed_encoder import HulumedVisionEncoderConfig
except ModuleNotFoundError:
spec = importlib.util.spec_from_file_location(
"configuration_hulumed_encoder",
osp.join(osp.dirname(__file__), "configuration_hulumed_encoder.py"),
)
configuration_hulumed_encoder = importlib.util.module_from_spec(spec)
spec.loader.exec_module(configuration_hulumed_encoder)
HulumedVisionEncoderConfig = getattr(
configuration_hulumed_encoder,
"HulumedVisionEncoderConfig",
)
try:
from .modeling_hulumed_encoder import HulumedVisionEncoderModel
except ModuleNotFoundError:
spec = importlib.util.spec_from_file_location(
"modeling_hulumed_encoder",
osp.join(osp.dirname(__file__), "modeling_hulumed_encoder.py"),
)
modeling_hulumed_encoder = importlib.util.module_from_spec(spec)
spec.loader.exec_module(modeling_hulumed_encoder)
HulumedVisionEncoderModel = getattr(
modeling_hulumed_encoder,
"HulumedVisionEncoderModel",
)
AutoConfig.register("hulumed_vision_encoder", HulumedVisionEncoderConfig)
AutoModel.register(HulumedVisionEncoderConfig, HulumedVisionEncoderModel)
class HulumedQwen3Config(Qwen3Config):
"""
HuluMed model configuration.
This configuration class extends Qwen2Config to store the configuration of a HuluMed model.
It includes configuration for the vision encoder and multimodal projector.
"""
model_type = "hulumed_qwen3"
sub_configs = {"vision_encoder_config": HulumedVisionEncoderConfig}
def __init__(
self,
vision_encoder: Optional[str] = None,
vision_encoder_config: Dict[str, Any] = {},
mm_projector_type: str = "mlp2x_gelu",
use_token_compression: bool = True,
image_token_index: int = -1,
**kwargs,
):
"""
Initialize HuluMed configuration.
Args:
vision_encoder (str, optional): Path or identifier of the vision encoder.
vision_encoder_config (dict, optional): Configuration for the vision encoder.
mm_projector_type (str): Type of multimodal projector. Default is "mlp2x_gelu".
use_token_compression (bool): Whether to use token compression for videos. Default is True.
image_token_index (int): Token index for image placeholders. Default is -1.
**kwargs: Additional arguments passed to Qwen2Config.
"""
super().__init__(**kwargs)
self.model_type = "hulumed_qwen3"
self.vision_encoder = vision_encoder
if vision_encoder_config is not None and not isinstance(vision_encoder_config, PretrainedConfig):
vision_encoder_config = HulumedVisionEncoderConfig(**vision_encoder_config)
self.vision_encoder_config = vision_encoder_config
self.mm_projector_type = mm_projector_type
self.use_token_compression = use_token_compression
self.image_token_index = image_token_index |