Upload folder using huggingface_hub
Browse files- config.json +1 -1
- esm_nv.py +9 -2
config.json
CHANGED
|
@@ -33,7 +33,7 @@
|
|
| 33 |
"qkv_weight_interleaved": true,
|
| 34 |
"token_dropout": true,
|
| 35 |
"torch_dtype": "float32",
|
| 36 |
-
"transformers_version": "4.55.
|
| 37 |
"use_cache": true,
|
| 38 |
"vocab_list": null,
|
| 39 |
"vocab_size": 33
|
|
|
|
| 33 |
"qkv_weight_interleaved": true,
|
| 34 |
"token_dropout": true,
|
| 35 |
"torch_dtype": "float32",
|
| 36 |
+
"transformers_version": "4.55.4",
|
| 37 |
"use_cache": true,
|
| 38 |
"vocab_list": null,
|
| 39 |
"vocab_size": 33
|
esm_nv.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# coding=utf-8
|
|
|
|
| 2 |
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 3 |
# SPDX-License-Identifier: LicenseRef-Apache2
|
| 4 |
# Copyright 2022 Meta and The HuggingFace Inc. team. All rights reserved.
|
|
@@ -137,7 +138,7 @@ class NVEsmEncoder(nn.Module):
|
|
| 137 |
self.emb_layer_norm_after = transformer_engine.pytorch.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
| 138 |
if config.position_embedding_type == "rotary":
|
| 139 |
self.rotary_embeddings = RotaryPositionEmbedding(config.hidden_size // config.num_attention_heads)
|
| 140 |
-
self.te_rope_emb = self.rotary_embeddings(max_seq_len=config.max_position_embeddings)
|
| 141 |
else:
|
| 142 |
self.te_rope_emb = None
|
| 143 |
|
|
@@ -156,6 +157,12 @@ class NVEsmEncoder(nn.Module):
|
|
| 156 |
"""
|
| 157 |
all_hidden_states = () if output_hidden_states else None
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
for layer_module in self.layers:
|
| 160 |
if output_hidden_states:
|
| 161 |
all_hidden_states = (*all_hidden_states, hidden_states)
|
|
@@ -163,7 +170,7 @@ class NVEsmEncoder(nn.Module):
|
|
| 163 |
hidden_states = layer_module(
|
| 164 |
hidden_states,
|
| 165 |
attention_mask,
|
| 166 |
-
rotary_pos_emb=
|
| 167 |
)
|
| 168 |
|
| 169 |
hidden_states = self.emb_layer_norm_after(hidden_states)
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
# noqa: license-check
|
| 3 |
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 4 |
# SPDX-License-Identifier: LicenseRef-Apache2
|
| 5 |
# Copyright 2022 Meta and The HuggingFace Inc. team. All rights reserved.
|
|
|
|
| 138 |
self.emb_layer_norm_after = transformer_engine.pytorch.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
| 139 |
if config.position_embedding_type == "rotary":
|
| 140 |
self.rotary_embeddings = RotaryPositionEmbedding(config.hidden_size // config.num_attention_heads)
|
| 141 |
+
self.te_rope_emb = self.rotary_embeddings(max_seq_len=config.max_position_embeddings)
|
| 142 |
else:
|
| 143 |
self.te_rope_emb = None
|
| 144 |
|
|
|
|
| 157 |
"""
|
| 158 |
all_hidden_states = () if output_hidden_states else None
|
| 159 |
|
| 160 |
+
if self.te_rope_emb is not None:
|
| 161 |
+
te_rope_emb = self.te_rope_emb.to(hidden_states.device, non_blocking=True)
|
| 162 |
+
te_rope_emb = te_rope_emb[: hidden_states.shape[1]]
|
| 163 |
+
else:
|
| 164 |
+
te_rope_emb = None
|
| 165 |
+
|
| 166 |
for layer_module in self.layers:
|
| 167 |
if output_hidden_states:
|
| 168 |
all_hidden_states = (*all_hidden_states, hidden_states)
|
|
|
|
| 170 |
hidden_states = layer_module(
|
| 171 |
hidden_states,
|
| 172 |
attention_mask,
|
| 173 |
+
rotary_pos_emb=te_rope_emb,
|
| 174 |
)
|
| 175 |
|
| 176 |
hidden_states = self.emb_layer_norm_after(hidden_states)
|