Upload modeling_meralion.py with huggingface_hub
Browse files- modeling_meralion.py +1 -46
modeling_meralion.py
CHANGED
|
@@ -1,18 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
# Copyright 2024 the HuggingFace Inc. team. All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
-
# you may not use this file except in compliance with the License.
|
| 6 |
-
# You may obtain a copy of the License at
|
| 7 |
-
#
|
| 8 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
-
#
|
| 10 |
-
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
-
# See the License for the specific language governing permissions and
|
| 14 |
-
# limitations under the License.
|
| 15 |
-
"""PyTorch MERaLiON model."""
|
| 16 |
|
| 17 |
import math
|
| 18 |
from dataclasses import dataclass
|
|
@@ -1161,37 +1147,6 @@ class MERaLiONForConditionalGeneration(MERaLiONPreTrainedModel, GenerationMixin)
|
|
| 1161 |
self.vocab_size = model_embeds.num_embeddings
|
| 1162 |
return model_embeds
|
| 1163 |
|
| 1164 |
-
def _get_multimodal_input_embeds(
|
| 1165 |
-
self,
|
| 1166 |
-
input_ids_left,
|
| 1167 |
-
input_ids_right,
|
| 1168 |
-
attention_mask_left,
|
| 1169 |
-
attention_mask_right,
|
| 1170 |
-
speech_audio_contexts_embeds,
|
| 1171 |
-
speech_audio_contexts_atts,
|
| 1172 |
-
):
|
| 1173 |
-
input_embeds_left = self.text_decoder.base_model.embed_tokens(input_ids_left)
|
| 1174 |
-
input_embeds_right = self.text_decoder.base_model.embed_tokens(input_ids_right)
|
| 1175 |
-
|
| 1176 |
-
multimodal_embeds = torch.cat(
|
| 1177 |
-
[
|
| 1178 |
-
input_embeds_left,
|
| 1179 |
-
speech_audio_contexts_embeds,
|
| 1180 |
-
input_embeds_right,
|
| 1181 |
-
],
|
| 1182 |
-
dim=1,
|
| 1183 |
-
)
|
| 1184 |
-
|
| 1185 |
-
multimodal_attention_mask = torch.cat(
|
| 1186 |
-
[
|
| 1187 |
-
attention_mask_left,
|
| 1188 |
-
speech_audio_contexts_atts,
|
| 1189 |
-
attention_mask_right,
|
| 1190 |
-
],
|
| 1191 |
-
dim=1,
|
| 1192 |
-
)
|
| 1193 |
-
return multimodal_embeds, multimodal_attention_mask
|
| 1194 |
-
|
| 1195 |
@add_start_docstrings_to_model_forward(MERALION_INPUTS_DOCSTRING)
|
| 1196 |
@replace_return_docstrings(output_type=MERaLiONOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
| 1197 |
def forward(
|
|
|
|
| 1 |
+
"""PyTorch MERaLiON AudioLLM model."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import math
|
| 4 |
from dataclasses import dataclass
|
|
|
|
| 1147 |
self.vocab_size = model_embeds.num_embeddings
|
| 1148 |
return model_embeds
|
| 1149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1150 |
@add_start_docstrings_to_model_forward(MERALION_INPUTS_DOCSTRING)
|
| 1151 |
@replace_return_docstrings(output_type=MERaLiONOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
| 1152 |
def forward(
|