Hulu-Med-14B / configuration_hulumed_qwen3.py
ZJU-AI4H's picture
Upload folder using huggingface_hub
d8e2cf1 verified
raw
history blame
3.23 kB
"""HuluMed model configuration."""
import importlib.util
import os.path as osp
from typing import Optional, Dict, Any
from transformers import AutoConfig, AutoModel, PretrainedConfig, Qwen3Config
try:
from .configuration_hulumed_encoder import HulumedVisionEncoderConfig
except ModuleNotFoundError:
spec = importlib.util.spec_from_file_location(
"configuration_hulumed_encoder",
osp.join(osp.dirname(__file__), "configuration_hulumed_encoder.py"),
)
configuration_hulumed_encoder = importlib.util.module_from_spec(spec)
spec.loader.exec_module(configuration_hulumed_encoder)
HulumedVisionEncoderConfig = getattr(
configuration_hulumed_encoder,
"HulumedVisionEncoderConfig",
)
try:
from .modeling_hulumed_encoder import HulumedVisionEncoderModel
except ModuleNotFoundError:
spec = importlib.util.spec_from_file_location(
"modeling_hulumed_encoder",
osp.join(osp.dirname(__file__), "modeling_hulumed_encoder.py"),
)
modeling_hulumed_encoder = importlib.util.module_from_spec(spec)
spec.loader.exec_module(modeling_hulumed_encoder)
HulumedVisionEncoderModel = getattr(
modeling_hulumed_encoder,
"HulumedVisionEncoderModel",
)
AutoConfig.register("hulumed_vision_encoder", HulumedVisionEncoderConfig)
AutoModel.register(HulumedVisionEncoderConfig, HulumedVisionEncoderModel)
class HulumedQwen3Config(Qwen3Config):
"""
HuluMed model configuration.
This configuration class extends Qwen2Config to store the configuration of a HuluMed model.
It includes configuration for the vision encoder and multimodal projector.
"""
model_type = "hulumed_qwen3"
sub_configs = {"vision_encoder_config": HulumedVisionEncoderConfig}
def __init__(
self,
vision_encoder: Optional[str] = None,
vision_encoder_config: Dict[str, Any] = {},
mm_projector_type: str = "mlp2x_gelu",
use_token_compression: bool = True,
image_token_index: int = -1,
**kwargs,
):
"""
Initialize HuluMed configuration.
Args:
vision_encoder (str, optional): Path or identifier of the vision encoder.
vision_encoder_config (dict, optional): Configuration for the vision encoder.
mm_projector_type (str): Type of multimodal projector. Default is "mlp2x_gelu".
use_token_compression (bool): Whether to use token compression for videos. Default is True.
image_token_index (int): Token index for image placeholders. Default is -1.
**kwargs: Additional arguments passed to Qwen2Config.
"""
super().__init__(**kwargs)
self.model_type = "hulumed_qwen3"
self.vision_encoder = vision_encoder
if vision_encoder_config is not None and not isinstance(vision_encoder_config, PretrainedConfig):
vision_encoder_config = HulumedVisionEncoderConfig(**vision_encoder_config)
self.vision_encoder_config = vision_encoder_config
self.mm_projector_type = mm_projector_type
self.use_token_compression = use_token_compression
self.image_token_index = image_token_index