Update README.md
Browse files
README.md
CHANGED
|
@@ -30,8 +30,21 @@ import mlx.core as mx
|
|
| 30 |
|
| 31 |
model, tokenizer = load("mlx-community/embeddinggemma-300m-6bit")
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
embeddings = output.text_embeds # Normalized embeddings
|
| 36 |
|
| 37 |
# Compute dot product between normalized embeddings
|
|
@@ -41,4 +54,22 @@ print("Similarity matrix between texts:")
|
|
| 41 |
print(similarity_matrix)
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
```
|
|
|
|
| 30 |
|
| 31 |
model, tokenizer = load("mlx-community/embeddinggemma-300m-6bit")
|
| 32 |
|
| 33 |
+
|
| 34 |
+
# For text embedding
|
| 35 |
+
sentences = [
|
| 36 |
+
"task: sentence similarity | query: Nothing really matters.",
|
| 37 |
+
"task: sentence similarity | query: The dog is barking.",
|
| 38 |
+
"task: sentence similarity | query: The dog is barking.",
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='mlx')
|
| 42 |
+
|
| 43 |
+
# Compute token embeddings
|
| 44 |
+
input_ids = encoded_input['input_ids']
|
| 45 |
+
attention_mask = encoded_input['attention_mask']
|
| 46 |
+
output = model(input_ids, attention_mask)
|
| 47 |
+
|
| 48 |
embeddings = output.text_embeds # Normalized embeddings
|
| 49 |
|
| 50 |
# Compute dot product between normalized embeddings
|
|
|
|
| 54 |
print(similarity_matrix)
|
| 55 |
|
| 56 |
|
| 57 |
+
# You can use these task-specific prefixes for different tasks
|
| 58 |
+
task_prefixes = {
|
| 59 |
+
"BitextMining": "task: search result | query: ",
|
| 60 |
+
"Clustering": "task: clustering | query: ",
|
| 61 |
+
"Classification": "task: classification | query: ",
|
| 62 |
+
"MultilabelClassification": "task: classification | query: ",
|
| 63 |
+
"PairClassification": "task: sentence similarity | query: ",
|
| 64 |
+
"InstructionRetrieval": "task: code retrieval | query: ",
|
| 65 |
+
"Reranking": "task: search result | query: ",
|
| 66 |
+
"Retrieval": "task: search result | query: ",
|
| 67 |
+
"Retrieval-query": "task: search result | query: ",
|
| 68 |
+
"Retrieval-document": "title: none | text: ",
|
| 69 |
+
"STS": "task: sentence similarity | query: ",
|
| 70 |
+
"Summarization": "task: summarization | query: ",
|
| 71 |
+
"document": "title: none | text: "
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
|
| 75 |
```
|