Complete Sentence Transformers integration + patch inference on CPU & Windows (#4)
Browse files- Remove reference_compile; complete Sentence Transformers support (5c66f04b1ffb7a7db1a480361c8a574deed2e3b7)
- Notify users that flash_attn is recommended (b94cf64d6cb8ea2e1746f7e6df169b8300804035)
- README.md +29 -10
- config.json +0 -1
- config_sentence_transformers.json +10 -0
- modules.json +14 -0
README.md
CHANGED
|
@@ -6,6 +6,8 @@ base_model:
|
|
| 6 |
- answerdotai/ModernBERT-base
|
| 7 |
pipeline_tag: sentence-similarity
|
| 8 |
library_name: transformers
|
|
|
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
# gte-modernbert-base
|
|
@@ -33,10 +35,17 @@ The `gte-modernbert` models demonstrates competitive performance in several text
|
|
| 33 |
|
| 34 |
## Usage
|
| 35 |
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
```python
|
| 39 |
-
# Requires transformers>=4.
|
| 40 |
|
| 41 |
import torch.nn.functional as F
|
| 42 |
from transformers import AutoModel, AutoTokenizer
|
|
@@ -48,9 +57,9 @@ input_texts = [
|
|
| 48 |
"sorting algorithms"
|
| 49 |
]
|
| 50 |
|
| 51 |
-
model_path =
|
| 52 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 53 |
-
model = AutoModel.from_pretrained(model_path
|
| 54 |
|
| 55 |
# Tokenize the input texts
|
| 56 |
batch_dict = tokenizer(input_texts, max_length=8192, padding=True, truncation=True, return_tensors='pt')
|
|
@@ -62,21 +71,31 @@ embeddings = outputs.last_hidden_state[:, 0]
|
|
| 62 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
| 63 |
scores = (embeddings[:1] @ embeddings[1:].T) * 100
|
| 64 |
print(scores.tolist())
|
|
|
|
| 65 |
```
|
| 66 |
|
| 67 |
Use with `sentence-transformers`:
|
| 68 |
|
| 69 |
```python
|
| 70 |
-
# Requires
|
| 71 |
-
|
| 72 |
from sentence_transformers import SentenceTransformer
|
| 73 |
from sentence_transformers.util import cos_sim
|
| 74 |
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
```
|
| 81 |
|
| 82 |
Use with `transformers.js`:
|
|
|
|
| 6 |
- answerdotai/ModernBERT-base
|
| 7 |
pipeline_tag: sentence-similarity
|
| 8 |
library_name: transformers
|
| 9 |
+
tags:
|
| 10 |
+
- sentence-transformers
|
| 11 |
---
|
| 12 |
|
| 13 |
# gte-modernbert-base
|
|
|
|
| 35 |
|
| 36 |
## Usage
|
| 37 |
|
| 38 |
+
> [!TIP]
|
| 39 |
+
> For `transformers` and `sentence-transformers`, if your GPU supports it, the efficient Flash Attention 2 will be used automatically if you have `flash_attn` installed. It is not mandatory.
|
| 40 |
+
>
|
| 41 |
+
> ```bash
|
| 42 |
+
> pip install flash_attn
|
| 43 |
+
> ```
|
| 44 |
+
|
| 45 |
+
Use with `transformers`
|
| 46 |
|
| 47 |
```python
|
| 48 |
+
# Requires transformers>=4.48.0
|
| 49 |
|
| 50 |
import torch.nn.functional as F
|
| 51 |
from transformers import AutoModel, AutoTokenizer
|
|
|
|
| 57 |
"sorting algorithms"
|
| 58 |
]
|
| 59 |
|
| 60 |
+
model_path = "Alibaba-NLP/gte-modernbert-base"
|
| 61 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 62 |
+
model = AutoModel.from_pretrained(model_path)
|
| 63 |
|
| 64 |
# Tokenize the input texts
|
| 65 |
batch_dict = tokenizer(input_texts, max_length=8192, padding=True, truncation=True, return_tensors='pt')
|
|
|
|
| 71 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
| 72 |
scores = (embeddings[:1] @ embeddings[1:].T) * 100
|
| 73 |
print(scores.tolist())
|
| 74 |
+
# [[42.89073944091797, 71.30911254882812, 33.664554595947266]]
|
| 75 |
```
|
| 76 |
|
| 77 |
Use with `sentence-transformers`:
|
| 78 |
|
| 79 |
```python
|
| 80 |
+
# Requires transformers>=4.48.0
|
|
|
|
| 81 |
from sentence_transformers import SentenceTransformer
|
| 82 |
from sentence_transformers.util import cos_sim
|
| 83 |
|
| 84 |
+
input_texts = [
|
| 85 |
+
"what is the capital of China?",
|
| 86 |
+
"how to implement quick sort in python?",
|
| 87 |
+
"Beijing",
|
| 88 |
+
"sorting algorithms"
|
| 89 |
+
]
|
| 90 |
+
|
| 91 |
+
model = SentenceTransformer("Alibaba-NLP/gte-modernbert-base")
|
| 92 |
+
embeddings = model.encode(input_texts)
|
| 93 |
+
print(embeddings.shape)
|
| 94 |
+
# (4, 768)
|
| 95 |
|
| 96 |
+
similarities = cos_sim(embeddings[0], embeddings[1:])
|
| 97 |
+
print(similarities)
|
| 98 |
+
# tensor([[0.4289, 0.7131, 0.3366]])
|
| 99 |
```
|
| 100 |
|
| 101 |
Use with `transformers.js`:
|
config.json
CHANGED
|
@@ -35,7 +35,6 @@
|
|
| 35 |
"num_hidden_layers": 22,
|
| 36 |
"pad_token_id": 50283,
|
| 37 |
"position_embedding_type": "absolute",
|
| 38 |
-
"reference_compile": true,
|
| 39 |
"sep_token_id": 50282,
|
| 40 |
"sparse_pred_ignore_index": -100,
|
| 41 |
"sparse_prediction": false,
|
|
|
|
| 35 |
"num_hidden_layers": 22,
|
| 36 |
"pad_token_id": 50283,
|
| 37 |
"position_embedding_type": "absolute",
|
|
|
|
| 38 |
"sep_token_id": 50282,
|
| 39 |
"sparse_pred_ignore_index": -100,
|
| 40 |
"sparse_prediction": false,
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "2.7.0",
|
| 4 |
+
"transformers": "4.48.0",
|
| 5 |
+
"pytorch": "2.5.0+cu121"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null,
|
| 9 |
+
"similarity_fn_name": "cosine"
|
| 10 |
+
}
|
modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|