Spaces:
Runtime error
Runtime error
| import os | |
| from .clip_encoder import CLIPVisionTower | |
| from .imagebind import ImageBindWrapper | |
| from .open_clip_encoder import OpenCLIPVisionTower | |
| from .siglip_encoder import SigLipVisionTower | |
| from .clip_encoder import CLIPVisionTower, CLIPVisionTowerS2 | |
| from .eva_clip.eva_clip_encoder import EvaClipVisionTower | |
| from .dev_eva_clip.eva_vit import EvaViTWrapper | |
| from blip3o.model.nextdit_crossattn import NextDiTCrossAttnConfig, NextDiTCrossAttn | |
| from diffusers.models import AutoencoderKL | |
| from diffusers.schedulers import FlowMatchEulerDiscreteScheduler | |
| def build_vision_tower(vision_tower_cfg, **kwargs): | |
| vision_tower = getattr(vision_tower_cfg, 'mm_vision_tower', getattr(vision_tower_cfg, 'vision_tower', None)) | |
| is_absolute_path_exists = os.path.exists(vision_tower) | |
| use_s2 = getattr(vision_tower_cfg, 's2', False) | |
| if "siglip" in vision_tower: | |
| return SigLipVisionTower(vision_tower, vision_tower_cfg=vision_tower_cfg, **kwargs) | |
| if "eva" in vision_tower: | |
| return EvaClipVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) | |
| if is_absolute_path_exists or vision_tower.startswith("openai") or vision_tower.startswith("laion") or "ShareGPT4V" in vision_tower: | |
| if use_s2: | |
| return CLIPVisionTowerS2(vision_tower, args=vision_tower_cfg, **kwargs) | |
| else: | |
| return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) | |
| raise ValueError(f'Unknown vision tower: {vision_tower}') | |
| def build_gen_vision_tower(vision_tower_cfg, **kwargs): | |
| vision_tower = getattr(vision_tower_cfg, 'gen_vision_tower') | |
| is_absolute_path_exists = os.path.exists(vision_tower) | |
| use_s2 = getattr(vision_tower_cfg, 's2', False) | |
| if "siglip" in vision_tower: | |
| return SigLipVisionTower(vision_tower, vision_tower_cfg=vision_tower_cfg, **kwargs) | |
| if "eva" in vision_tower: | |
| return EvaClipVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) | |
| if is_absolute_path_exists or vision_tower.startswith("openai") or vision_tower.startswith("laion") or "ShareGPT4V" in vision_tower: | |
| if use_s2: | |
| return CLIPVisionTowerS2(vision_tower, args=vision_tower_cfg, **kwargs) | |
| else: | |
| return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) | |
| raise ValueError(f'Unknown vision tower: {vision_tower}') | |
| def build_dit(vision_tower_cfg, **kwargs): | |
| vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae") | |
| # vae = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae") | |
| dit = NextDiTCrossAttn(NextDiTCrossAttnConfig()) | |
| noise_scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained("Alpha-VLLM/Lumina-Next-SFT-diffusers", subfolder="scheduler") | |
| # scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained("Alpha-VLLM/Lumina-Next-SFT-diffusers", subfolder="scheduler") | |
| vae.eval() | |
| vae.requires_grad_(False) | |
| return dit, vae, noise_scheduler | |