Fix broken SentenceTransformer snippet; format code with Python format
#11
by
tomaarsen
HF Staff
- opened
README.md
CHANGED
|
@@ -4660,7 +4660,7 @@ refer to [enable-unpadding-and-xformers](https://huggingface.co/Alibaba-NLP/new-
|
|
| 4660 |
|
| 4661 |
|
| 4662 |
### Get Dense Embeddings with Transformers
|
| 4663 |
-
```
|
| 4664 |
# Requires transformers>=4.36.0
|
| 4665 |
|
| 4666 |
import torch.nn.functional as F
|
|
@@ -4693,12 +4693,10 @@ print(scores.tolist())
|
|
| 4693 |
```
|
| 4694 |
|
| 4695 |
### Use with sentence-transformers
|
| 4696 |
-
```
|
| 4697 |
# Requires sentences-transformers>=3.0.0
|
| 4698 |
|
| 4699 |
from sentence_transformers import SentenceTransformer
|
| 4700 |
-
from sentence_transformers.util import cos_sim
|
| 4701 |
-
import numpy as np
|
| 4702 |
|
| 4703 |
input_texts = [
|
| 4704 |
"what is the capital of China?",
|
|
@@ -4708,24 +4706,18 @@ input_texts = [
|
|
| 4708 |
]
|
| 4709 |
|
| 4710 |
model_name_or_path="Alibaba-NLP/gte-multilingual-base"
|
| 4711 |
-
model = SentenceTransformer(
|
| 4712 |
-
embeddings = model.encode(input_texts) # embeddings.shape (4, 768)
|
| 4713 |
-
|
| 4714 |
-
# normalized embeddings
|
| 4715 |
-
norms = np.linalg.norm(embeddings, ord=2, axis=1, keepdims=True)
|
| 4716 |
-
norms[norms == 0] = 1
|
| 4717 |
-
embeddings = embeddings / norms
|
| 4718 |
|
| 4719 |
# sim scores
|
| 4720 |
-
scores = (embeddings[:1]
|
| 4721 |
|
| 4722 |
print(scores.tolist())
|
| 4723 |
# [[0.301699697971344, 0.7503870129585266, 0.32030850648880005]]
|
| 4724 |
-
|
| 4725 |
```
|
| 4726 |
|
| 4727 |
### Use with custom code to get dense embeddigns and sparse token weights
|
| 4728 |
-
```
|
| 4729 |
# You can find the script gte_embedding.py in https://huggingface.co/Alibaba-NLP/gte-multilingual-base/blob/main/scripts/gte_embedding.py
|
| 4730 |
|
| 4731 |
from gte_embedding import GTEEmbeddidng
|
|
|
|
| 4660 |
|
| 4661 |
|
| 4662 |
### Get Dense Embeddings with Transformers
|
| 4663 |
+
```python
|
| 4664 |
# Requires transformers>=4.36.0
|
| 4665 |
|
| 4666 |
import torch.nn.functional as F
|
|
|
|
| 4693 |
```
|
| 4694 |
|
| 4695 |
### Use with sentence-transformers
|
| 4696 |
+
```python
|
| 4697 |
# Requires sentences-transformers>=3.0.0
|
| 4698 |
|
| 4699 |
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
| 4700 |
|
| 4701 |
input_texts = [
|
| 4702 |
"what is the capital of China?",
|
|
|
|
| 4706 |
]
|
| 4707 |
|
| 4708 |
model_name_or_path="Alibaba-NLP/gte-multilingual-base"
|
| 4709 |
+
model = SentenceTransformer(model_name_or_path, trust_remote_code=True)
|
| 4710 |
+
embeddings = model.encode(input_texts, normalize_embeddings=True) # embeddings.shape (4, 768)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4711 |
|
| 4712 |
# sim scores
|
| 4713 |
+
scores = model.similarity(embeddings[:1], embeddings[1:])
|
| 4714 |
|
| 4715 |
print(scores.tolist())
|
| 4716 |
# [[0.301699697971344, 0.7503870129585266, 0.32030850648880005]]
|
|
|
|
| 4717 |
```
|
| 4718 |
|
| 4719 |
### Use with custom code to get dense embeddigns and sparse token weights
|
| 4720 |
+
```python
|
| 4721 |
# You can find the script gte_embedding.py in https://huggingface.co/Alibaba-NLP/gte-multilingual-base/blob/main/scripts/gte_embedding.py
|
| 4722 |
|
| 4723 |
from gte_embedding import GTEEmbeddidng
|