Update README.md
Browse files
README.md
CHANGED
|
@@ -171,6 +171,49 @@ OpenCLIP software
|
|
| 171 |
}
|
| 172 |
```
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
# How to Get Started with the Model
|
| 175 |
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
}
|
| 172 |
```
|
| 173 |
|
| 174 |
+
CLIP benchmark software
|
| 175 |
+
```
|
| 176 |
+
@software{cherti_2025_15403103,
|
| 177 |
+
author = {Cherti, Mehdi and
|
| 178 |
+
Beaumont, Romain},
|
| 179 |
+
title = {CLIP benchmark},
|
| 180 |
+
month = may,
|
| 181 |
+
year = 2025,
|
| 182 |
+
publisher = {Zenodo},
|
| 183 |
+
doi = {10.5281/zenodo.15403103},
|
| 184 |
+
url = {https://doi.org/10.5281/zenodo.15403103},
|
| 185 |
+
swhid = {swh:1:dir:8cf49a5dd06f59224844a1e767337a1d14ee56c2
|
| 186 |
+
;origin=https://doi.org/10.5281/zenodo.15403102;vi
|
| 187 |
+
sit=swh:1:snp:dd153b26f702d614346bf814f723d59fef3d
|
| 188 |
+
77a2;anchor=swh:1:rel:cff2aeb98f42583b44fdab5374e9
|
| 189 |
+
fa71793f2cff;path=CLIP\\_benchmark-main
|
| 190 |
+
},
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
# How to Get Started with the Model
|
| 194 |
|
| 195 |
+
Zero-shot classification example:
|
| 196 |
+
|
| 197 |
+
```python
|
| 198 |
+
import torch
|
| 199 |
+
from PIL import Image
|
| 200 |
+
import open_clip
|
| 201 |
+
|
| 202 |
+
model, _, preprocess = open_clip.create_model_and_transforms('hf-hub:laion/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K')
|
| 203 |
+
model.eval() # model in train mode by default, impacts some models with BatchNorm or stochastic depth active
|
| 204 |
+
tokenizer = open_clip.get_tokenizer('hf-hub:laion/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K')
|
| 205 |
+
|
| 206 |
+
image = preprocess(Image.open("docs/CLIP.png")).unsqueeze(0)
|
| 207 |
+
text = tokenizer(["a diagram", "a dog", "a cat"])
|
| 208 |
+
|
| 209 |
+
with torch.no_grad(), torch.autocast("cuda"):
|
| 210 |
+
image_features = model.encode_image(image)
|
| 211 |
+
text_features = model.encode_text(text)
|
| 212 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
| 213 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
| 214 |
+
|
| 215 |
+
text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
|
| 216 |
+
|
| 217 |
+
print("Label probs:", text_probs) # prints: [[1., 0., 0.]]
|
| 218 |
+
|
| 219 |
+
```
|