Upload files with `vila-upload`.
Browse filesUpload mm_utils.py
Upload siglip_encoder.py
- mm_utils.py +1 -1
- siglip_encoder.py +6 -2
mm_utils.py
CHANGED
|
@@ -26,7 +26,7 @@ import torch
|
|
| 26 |
from PIL import Image
|
| 27 |
from transformers import StoppingCriteria
|
| 28 |
|
| 29 |
-
from
|
| 30 |
|
| 31 |
|
| 32 |
def get_frame_from_vcap(vidcap, num_frames=10, max_fps=0.0, fps=None, frame_count=None, video_file_name=None):
|
|
|
|
| 26 |
from PIL import Image
|
| 27 |
from transformers import StoppingCriteria
|
| 28 |
|
| 29 |
+
from .constants import DEFAULT_IMAGE_TOKEN
|
| 30 |
|
| 31 |
|
| 32 |
def get_frame_from_vcap(vidcap, num_frames=10, max_fps=0.0, fps=None, frame_count=None, video_file_name=None):
|
siglip_encoder.py
CHANGED
|
@@ -19,12 +19,16 @@ import torch.nn as nn
|
|
| 19 |
import torch.nn.functional as F
|
| 20 |
from accelerate.hooks import add_hook_to_module
|
| 21 |
from einops import rearrange
|
| 22 |
-
|
| 23 |
from transformers import AutoConfig, PretrainedConfig, PreTrainedModel, SiglipImageProcessor
|
| 24 |
from transformers.image_processing_utils import BaseImageProcessor
|
| 25 |
-
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
| 26 |
from transformers.models.siglip import SiglipVisionModel
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
class VisionTower(nn.Module):
|
| 30 |
def __init__(self, vision_tower, args, delay_load=False):
|
|
|
|
| 19 |
import torch.nn.functional as F
|
| 20 |
from accelerate.hooks import add_hook_to_module
|
| 21 |
from einops import rearrange
|
| 22 |
+
|
| 23 |
from transformers import AutoConfig, PretrainedConfig, PreTrainedModel, SiglipImageProcessor
|
| 24 |
from transformers.image_processing_utils import BaseImageProcessor
|
|
|
|
| 25 |
from transformers.models.siglip import SiglipVisionModel
|
| 26 |
|
| 27 |
+
from s2wrapper import forward as multiscale_forward
|
| 28 |
+
|
| 29 |
+
# from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
| 30 |
+
def is_deepspeed_zero3_enabled():
|
| 31 |
+
return False
|
| 32 |
|
| 33 |
class VisionTower(nn.Module):
|
| 34 |
def __init__(self, vision_tower, args, delay_load=False):
|