Patrick Haller
commited on
Commit
·
cd8c17d
1
Parent(s):
00c609b
Adding SequenceClassification model/head to impl
Browse files- README.md +6 -1
- config.json +1 -0
- modeling_hf_alibaba_nlp_gte.py +61 -5
README.md
CHANGED
|
@@ -9044,6 +9044,11 @@ model-index:
|
|
| 9044 |
task:
|
| 9045 |
type: PairClassification
|
| 9046 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9047 |
<h1 align="center">Snowflake's Arctic-embed-m-v2.0</h1>
|
| 9048 |
<h4 align="center">
|
| 9049 |
<p>
|
|
@@ -9213,4 +9218,4 @@ You also can email Daniel Campos([email protected]).
|
|
| 9213 |
|
| 9214 |
|
| 9215 |
## License
|
| 9216 |
-
Arctic is licensed under the [Apache-2](https://www.apache.org/licenses/LICENSE-2.0). The released models can be used for commercial purposes free of charge.
|
|
|
|
| 9044 |
task:
|
| 9045 |
type: PairClassification
|
| 9046 |
---
|
| 9047 |
+
|
| 9048 |
+
---
|
| 9049 |
+
> [!IMPORTANT]
|
| 9050 |
+
> This is a fork of the original [snowflake-arctic-embed-m-v2.0](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v2.0) model, which was released under the Apache 2.0 license.
|
| 9051 |
+
---
|
| 9052 |
<h1 align="center">Snowflake's Arctic-embed-m-v2.0</h1>
|
| 9053 |
<h4 align="center">
|
| 9054 |
<p>
|
|
|
|
| 9218 |
|
| 9219 |
|
| 9220 |
## License
|
| 9221 |
+
Arctic is licensed under the [Apache-2](https://www.apache.org/licenses/LICENSE-2.0). The released models can be used for commercial purposes free of charge.
|
config.json
CHANGED
|
@@ -6,6 +6,7 @@
|
|
| 6 |
"auto_map": {
|
| 7 |
"AutoConfig": "configuration_hf_alibaba_nlp_gte.GteConfig",
|
| 8 |
"AutoModel": "modeling_hf_alibaba_nlp_gte.GteModel"
|
|
|
|
| 9 |
},
|
| 10 |
"classifier_dropout": 0.1,
|
| 11 |
"hidden_act": "gelu",
|
|
|
|
| 6 |
"auto_map": {
|
| 7 |
"AutoConfig": "configuration_hf_alibaba_nlp_gte.GteConfig",
|
| 8 |
"AutoModel": "modeling_hf_alibaba_nlp_gte.GteModel"
|
| 9 |
+
"AutoModelForSequenceClassification": "modeling_hf_alibaba_nlp_gte.GteForSequenceClassification"
|
| 10 |
},
|
| 11 |
"classifier_dropout": 0.1,
|
| 12 |
"hidden_act": "gelu",
|
modeling_hf_alibaba_nlp_gte.py
CHANGED
|
@@ -26,11 +26,7 @@ from transformers.activations import ACT2FN
|
|
| 26 |
from transformers.modeling_outputs import (
|
| 27 |
BaseModelOutput,
|
| 28 |
BaseModelOutputWithPooling,
|
| 29 |
-
|
| 30 |
-
MultipleChoiceModelOutput,
|
| 31 |
-
QuestionAnsweringModelOutput,
|
| 32 |
-
SequenceClassifierOutput,
|
| 33 |
-
ModelOutput,
|
| 34 |
)
|
| 35 |
from transformers.modeling_utils import PreTrainedModel
|
| 36 |
from transformers.utils import logging
|
|
@@ -965,3 +961,63 @@ class GteModel(GtePreTrainedModel):
|
|
| 965 |
hidden_states=encoder_outputs.hidden_states,
|
| 966 |
attentions=encoder_outputs.attentions,
|
| 967 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
from transformers.modeling_outputs import (
|
| 27 |
BaseModelOutput,
|
| 28 |
BaseModelOutputWithPooling,
|
| 29 |
+
SequenceClassifierOutputWithPast,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
)
|
| 31 |
from transformers.modeling_utils import PreTrainedModel
|
| 32 |
from transformers.utils import logging
|
|
|
|
| 961 |
hidden_states=encoder_outputs.hidden_states,
|
| 962 |
attentions=encoder_outputs.attentions,
|
| 963 |
)
|
| 964 |
+
|
| 965 |
+
|
| 966 |
+
class GteForSequenceClassification(GtePreTrainedModel):
|
| 967 |
+
|
| 968 |
+
def __init__(self, config: GteConfig):
|
| 969 |
+
|
| 970 |
+
self.config = config
|
| 971 |
+
self.num_labels = 1
|
| 972 |
+
self.model = GteModel(config, add_pooling_layer=True)
|
| 973 |
+
|
| 974 |
+
self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
|
| 975 |
+
self.loss_function = nn.MSELoss()
|
| 976 |
+
|
| 977 |
+
def get_input_embeddings(self):
|
| 978 |
+
return self.model.embed_tokens
|
| 979 |
+
|
| 980 |
+
def set_input_embeddings(self, value):
|
| 981 |
+
self.model.embed_tokens = value
|
| 982 |
+
|
| 983 |
+
def forward(
|
| 984 |
+
self,
|
| 985 |
+
input_ids: Optional[torch.LongTensor] = None,
|
| 986 |
+
attention_mask: Optional[torch.Tensor] = None,
|
| 987 |
+
position_ids: Optional[torch.LongTensor] = None,
|
| 988 |
+
past_key_values = None,
|
| 989 |
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
| 990 |
+
labels: Optional[torch.LongTensor] = None,
|
| 991 |
+
use_cache: Optional[bool] = None,
|
| 992 |
+
output_attentions: Optional[bool] = None,
|
| 993 |
+
output_hidden_states: Optional[bool] = None,
|
| 994 |
+
) -> SequenceClassifierOutputWithPast:
|
| 995 |
+
r"""
|
| 996 |
+
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
| 997 |
+
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
|
| 998 |
+
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
|
| 999 |
+
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
|
| 1000 |
+
"""
|
| 1001 |
+
|
| 1002 |
+
transformer_outputs = self.model(
|
| 1003 |
+
input_ids,
|
| 1004 |
+
attention_mask=attention_mask,
|
| 1005 |
+
position_ids=position_ids,
|
| 1006 |
+
inputs_embeds=inputs_embeds,
|
| 1007 |
+
output_attentions=output_attentions,
|
| 1008 |
+
output_hidden_states=output_hidden_states,
|
| 1009 |
+
)
|
| 1010 |
+
hidden_states = transformer_outputs.pooler_output
|
| 1011 |
+
|
| 1012 |
+
logits = self.score(hidden_states)
|
| 1013 |
+
|
| 1014 |
+
loss = None
|
| 1015 |
+
if labels is not None:
|
| 1016 |
+
loss = self.loss_function(logits.squeeze(-1), labels.squeeze(-1))
|
| 1017 |
+
|
| 1018 |
+
return SequenceClassifierOutputWithPast(
|
| 1019 |
+
loss=loss,
|
| 1020 |
+
logits=logits,
|
| 1021 |
+
hidden_states=transformer_outputs.hidden_states,
|
| 1022 |
+
attentions=transformer_outputs.attentions,
|
| 1023 |
+
)
|