daviddongdong commited on May 26

Commit

e6ac237

verified ·

1 Parent(s): d0de52d

Upload 99 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
bge-large-en-v1.5/1_Pooling/config.json +7 -0
bge-large-en-v1.5/README.md +3069 -0
bge-large-en-v1.5/config.json +32 -0
bge-large-en-v1.5/config_sentence_transformers.json +7 -0
bge-large-en-v1.5/modules.json +20 -0
bge-large-en-v1.5/pytorch_model.bin +3 -0
bge-large-en-v1.5/sentence_bert_config.json +4 -0
bge-large-en-v1.5/special_tokens_map.json +7 -0
bge-large-en-v1.5/tokenizer.json +0 -0
bge-large-en-v1.5/tokenizer_config.json +15 -0
bge-large-en-v1.5/vocab.txt +0 -0
colbertv2.0/README.md +215 -0
colbertv2.0/artifact.metadata +47 -0
colbertv2.0/config.json +26 -0
colbertv2.0/pytorch_model.bin +3 -0
colbertv2.0/special_tokens_map.json +1 -0
colbertv2.0/tokenizer.json +0 -0
colbertv2.0/tokenizer_config.json +1 -0
colbertv2.0/vocab.txt +0 -0
colpali-v1.1/.gitattributes +36 -0
colpali-v1.1/README.md +166 -0
colpali-v1.1/adapter_config.json +26 -0
colpali-v1.1/adapter_model.safetensors +3 -0
colpali-v1.1/checkpoint-3500/adapter_config.json +26 -0
colpali-v1.1/checkpoint-3500/adapter_model.safetensors +3 -0
colpali-v1.1/checkpoint-3500/optimizer.pt +3 -0
colpali-v1.1/checkpoint-3500/rng_state.pth +3 -0
colpali-v1.1/checkpoint-3500/scheduler.pt +3 -0
colpali-v1.1/checkpoint-3500/trainer_state.json +3043 -0
colpali-v1.1/checkpoint-3500/training_args.bin +3 -0
colpali-v1.1/git_hash.txt +1 -0
colpali-v1.1/preprocessor_config.json +40 -0
colpali-v1.1/results.json +1 -0
colpali-v1.1/special_tokens_map.json +39 -0
colpali-v1.1/tokenizer.json +3 -0
colpali-v1.1/tokenizer_config.json +0 -0
colpali-v1.1/training_config.yml +40 -0
colqwen2-v1.0/.gitattributes +36 -0
colqwen2-v1.0/README.md +126 -0
colqwen2-v1.0/adapter_config.json +26 -0
colqwen2-v1.0/adapter_model.safetensors +3 -0
colqwen2-v1.0/added_tokens.json +16 -0
colqwen2-v1.0/chat_template.json +3 -0
colqwen2-v1.0/git_hash.txt +1 -0
colqwen2-v1.0/handler.py +91 -0
colqwen2-v1.0/merges.txt +0 -0
colqwen2-v1.0/preprocessor_config.json +31 -0
colqwen2-v1.0/requirements.txt +2 -0
colqwen2-v1.0/results.json +465 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+colpali-v1.1/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+colqwen2-v1.0/tokenizer.json filter=lfs diff=lfs merge=lfs -text

bge-large-en-v1.5/1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "word_embedding_dimension": 1024,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false
+}

bge-large-en-v1.5/README.md ADDED Viewed

	@@ -0,0 +1,3069 @@

+---
+tags:
+- sentence-transformers
+- feature-extraction
+- sentence-similarity
+- transformers
+- mteb
+model-index:
+- name: bge-large-en-v1.5
+  results:
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_counterfactual
+      name: MTEB AmazonCounterfactualClassification (en)
+      config: en
+      split: test
+      revision: e8379541af4e31359cca9fbcf4b00f2671dba205
+    metrics:
+    - type: accuracy
+      value: 75.8507462686567
+    - type: ap
+      value: 38.566457320228245
+    - type: f1
+      value: 69.69386648043475
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_polarity
+      name: MTEB AmazonPolarityClassification
+      config: default
+      split: test
+      revision: e2d317d38cd51312af73b3d32a06d1a08b442046
+    metrics:
+    - type: accuracy
+      value: 92.416675
+    - type: ap
+      value: 89.1928861155922
+    - type: f1
+      value: 92.39477019574215
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_reviews_multi
+      name: MTEB AmazonReviewsClassification (en)
+      config: en
+      split: test
+      revision: 1399c76144fd37290681b995c656ef9b2e06e26d
+    metrics:
+    - type: accuracy
+      value: 48.175999999999995
+    - type: f1
+      value: 47.80712792870253
+  - task:
+      type: Retrieval
+    dataset:
+      type: arguana
+      name: MTEB ArguAna
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 40.184999999999995
+    - type: map_at_10
+      value: 55.654
+    - type: map_at_100
+      value: 56.25
+    - type: map_at_1000
+      value: 56.255
+    - type: map_at_3
+      value: 51.742999999999995
+    - type: map_at_5
+      value: 54.129000000000005
+    - type: mrr_at_1
+      value: 40.967
+    - type: mrr_at_10
+      value: 55.96
+    - type: mrr_at_100
+      value: 56.54900000000001
+    - type: mrr_at_1000
+      value: 56.554
+    - type: mrr_at_3
+      value: 51.980000000000004
+    - type: mrr_at_5
+      value: 54.44
+    - type: ndcg_at_1
+      value: 40.184999999999995
+    - type: ndcg_at_10
+      value: 63.542
+    - type: ndcg_at_100
+      value: 65.96499999999999
+    - type: ndcg_at_1000
+      value: 66.08699999999999
+    - type: ndcg_at_3
+      value: 55.582
+    - type: ndcg_at_5
+      value: 59.855000000000004
+    - type: precision_at_1
+      value: 40.184999999999995
+    - type: precision_at_10
+      value: 8.841000000000001
+    - type: precision_at_100
+      value: 0.987
+    - type: precision_at_1000
+      value: 0.1
+    - type: precision_at_3
+      value: 22.238
+    - type: precision_at_5
+      value: 15.405
+    - type: recall_at_1
+      value: 40.184999999999995
+    - type: recall_at_10
+      value: 88.407
+    - type: recall_at_100
+      value: 98.72
+    - type: recall_at_1000
+      value: 99.644
+    - type: recall_at_3
+      value: 66.714
+    - type: recall_at_5
+      value: 77.027
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/arxiv-clustering-p2p
+      name: MTEB ArxivClusteringP2P
+      config: default
+      split: test
+      revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
+    metrics:
+    - type: v_measure
+      value: 48.567077926750066
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/arxiv-clustering-s2s
+      name: MTEB ArxivClusteringS2S
+      config: default
+      split: test
+      revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
+    metrics:
+    - type: v_measure
+      value: 43.19453389182364
+  - task:
+      type: Reranking
+    dataset:
+      type: mteb/askubuntudupquestions-reranking
+      name: MTEB AskUbuntuDupQuestions
+      config: default
+      split: test
+      revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
+    metrics:
+    - type: map
+      value: 64.46555939623092
+    - type: mrr
+      value: 77.82361605768807
+  - task:
+      type: STS
+    dataset:
+      type: mteb/biosses-sts
+      name: MTEB BIOSSES
+      config: default
+      split: test
+      revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
+    metrics:
+    - type: cos_sim_pearson
+      value: 84.9554128814735
+    - type: cos_sim_spearman
+      value: 84.65373612172036
+    - type: euclidean_pearson
+      value: 83.2905059954138
+    - type: euclidean_spearman
+      value: 84.52240782811128
+    - type: manhattan_pearson
+      value: 82.99533802997436
+    - type: manhattan_spearman
+      value: 84.20673798475734
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/banking77
+      name: MTEB Banking77Classification
+      config: default
+      split: test
+      revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
+    metrics:
+    - type: accuracy
+      value: 87.78896103896103
+    - type: f1
+      value: 87.77189310964883
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/biorxiv-clustering-p2p
+      name: MTEB BiorxivClusteringP2P
+      config: default
+      split: test
+      revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
+    metrics:
+    - type: v_measure
+      value: 39.714538337650495
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/biorxiv-clustering-s2s
+      name: MTEB BiorxivClusteringS2S
+      config: default
+      split: test
+      revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
+    metrics:
+    - type: v_measure
+      value: 36.90108349284447
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackAndroidRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 32.795
+    - type: map_at_10
+      value: 43.669000000000004
+    - type: map_at_100
+      value: 45.151
+    - type: map_at_1000
+      value: 45.278
+    - type: map_at_3
+      value: 40.006
+    - type: map_at_5
+      value: 42.059999999999995
+    - type: mrr_at_1
+      value: 39.771
+    - type: mrr_at_10
+      value: 49.826
+    - type: mrr_at_100
+      value: 50.504000000000005
+    - type: mrr_at_1000
+      value: 50.549
+    - type: mrr_at_3
+      value: 47.115
+    - type: mrr_at_5
+      value: 48.832
+    - type: ndcg_at_1
+      value: 39.771
+    - type: ndcg_at_10
+      value: 50.217999999999996
+    - type: ndcg_at_100
+      value: 55.454
+    - type: ndcg_at_1000
+      value: 57.37
+    - type: ndcg_at_3
+      value: 44.885000000000005
+    - type: ndcg_at_5
+      value: 47.419
+    - type: precision_at_1
+      value: 39.771
+    - type: precision_at_10
+      value: 9.642000000000001
+    - type: precision_at_100
+      value: 1.538
+    - type: precision_at_1000
+      value: 0.198
+    - type: precision_at_3
+      value: 21.268
+    - type: precision_at_5
+      value: 15.536
+    - type: recall_at_1
+      value: 32.795
+    - type: recall_at_10
+      value: 62.580999999999996
+    - type: recall_at_100
+      value: 84.438
+    - type: recall_at_1000
+      value: 96.492
+    - type: recall_at_3
+      value: 47.071000000000005
+    - type: recall_at_5
+      value: 54.079
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackEnglishRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 32.671
+    - type: map_at_10
+      value: 43.334
+    - type: map_at_100
+      value: 44.566
+    - type: map_at_1000
+      value: 44.702999999999996
+    - type: map_at_3
+      value: 40.343
+    - type: map_at_5
+      value: 41.983
+    - type: mrr_at_1
+      value: 40.764
+    - type: mrr_at_10
+      value: 49.382
+    - type: mrr_at_100
+      value: 49.988
+    - type: mrr_at_1000
+      value: 50.03300000000001
+    - type: mrr_at_3
+      value: 47.293
+    - type: mrr_at_5
+      value: 48.51
+    - type: ndcg_at_1
+      value: 40.764
+    - type: ndcg_at_10
+      value: 49.039
+    - type: ndcg_at_100
+      value: 53.259
+    - type: ndcg_at_1000
+      value: 55.253
+    - type: ndcg_at_3
+      value: 45.091
+    - type: ndcg_at_5
+      value: 46.839999999999996
+    - type: precision_at_1
+      value: 40.764
+    - type: precision_at_10
+      value: 9.191
+    - type: precision_at_100
+      value: 1.476
+    - type: precision_at_1000
+      value: 0.19499999999999998
+    - type: precision_at_3
+      value: 21.72
+    - type: precision_at_5
+      value: 15.299
+    - type: recall_at_1
+      value: 32.671
+    - type: recall_at_10
+      value: 58.816
+    - type: recall_at_100
+      value: 76.654
+    - type: recall_at_1000
+      value: 89.05999999999999
+    - type: recall_at_3
+      value: 46.743
+    - type: recall_at_5
+      value: 51.783
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackGamingRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 40.328
+    - type: map_at_10
+      value: 53.32599999999999
+    - type: map_at_100
+      value: 54.37499999999999
+    - type: map_at_1000
+      value: 54.429
+    - type: map_at_3
+      value: 49.902
+    - type: map_at_5
+      value: 52.002
+    - type: mrr_at_1
+      value: 46.332
+    - type: mrr_at_10
+      value: 56.858
+    - type: mrr_at_100
+      value: 57.522
+    - type: mrr_at_1000
+      value: 57.54899999999999
+    - type: mrr_at_3
+      value: 54.472
+    - type: mrr_at_5
+      value: 55.996
+    - type: ndcg_at_1
+      value: 46.332
+    - type: ndcg_at_10
+      value: 59.313
+    - type: ndcg_at_100
+      value: 63.266999999999996
+    - type: ndcg_at_1000
+      value: 64.36
+    - type: ndcg_at_3
+      value: 53.815000000000005
+    - type: ndcg_at_5
+      value: 56.814
+    - type: precision_at_1
+      value: 46.332
+    - type: precision_at_10
+      value: 9.53
+    - type: precision_at_100
+      value: 1.238
+    - type: precision_at_1000
+      value: 0.13699999999999998
+    - type: precision_at_3
+      value: 24.054000000000002
+    - type: precision_at_5
+      value: 16.589000000000002
+    - type: recall_at_1
+      value: 40.328
+    - type: recall_at_10
+      value: 73.421
+    - type: recall_at_100
+      value: 90.059
+    - type: recall_at_1000
+      value: 97.81
+    - type: recall_at_3
+      value: 59.009
+    - type: recall_at_5
+      value: 66.352
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackGisRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 27.424
+    - type: map_at_10
+      value: 36.332
+    - type: map_at_100
+      value: 37.347
+    - type: map_at_1000
+      value: 37.422
+    - type: map_at_3
+      value: 33.743
+    - type: map_at_5
+      value: 35.176
+    - type: mrr_at_1
+      value: 29.153000000000002
+    - type: mrr_at_10
+      value: 38.233
+    - type: mrr_at_100
+      value: 39.109
+    - type: mrr_at_1000
+      value: 39.164
+    - type: mrr_at_3
+      value: 35.876000000000005
+    - type: mrr_at_5
+      value: 37.169000000000004
+    - type: ndcg_at_1
+      value: 29.153000000000002
+    - type: ndcg_at_10
+      value: 41.439
+    - type: ndcg_at_100
+      value: 46.42
+    - type: ndcg_at_1000
+      value: 48.242000000000004
+    - type: ndcg_at_3
+      value: 36.362
+    - type: ndcg_at_5
+      value: 38.743
+    - type: precision_at_1
+      value: 29.153000000000002
+    - type: precision_at_10
+      value: 6.315999999999999
+    - type: precision_at_100
+      value: 0.927
+    - type: precision_at_1000
+      value: 0.11199999999999999
+    - type: precision_at_3
+      value: 15.443000000000001
+    - type: precision_at_5
+      value: 10.644
+    - type: recall_at_1
+      value: 27.424
+    - type: recall_at_10
+      value: 55.364000000000004
+    - type: recall_at_100
+      value: 78.211
+    - type: recall_at_1000
+      value: 91.74600000000001
+    - type: recall_at_3
+      value: 41.379
+    - type: recall_at_5
+      value: 47.14
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackMathematicaRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 19.601
+    - type: map_at_10
+      value: 27.826
+    - type: map_at_100
+      value: 29.017
+    - type: map_at_1000
+      value: 29.137
+    - type: map_at_3
+      value: 25.125999999999998
+    - type: map_at_5
+      value: 26.765
+    - type: mrr_at_1
+      value: 24.005000000000003
+    - type: mrr_at_10
+      value: 32.716
+    - type: mrr_at_100
+      value: 33.631
+    - type: mrr_at_1000
+      value: 33.694
+    - type: mrr_at_3
+      value: 29.934
+    - type: mrr_at_5
+      value: 31.630999999999997
+    - type: ndcg_at_1
+      value: 24.005000000000003
+    - type: ndcg_at_10
+      value: 33.158
+    - type: ndcg_at_100
+      value: 38.739000000000004
+    - type: ndcg_at_1000
+      value: 41.495
+    - type: ndcg_at_3
+      value: 28.185
+    - type: ndcg_at_5
+      value: 30.796
+    - type: precision_at_1
+      value: 24.005000000000003
+    - type: precision_at_10
+      value: 5.908
+    - type: precision_at_100
+      value: 1.005
+    - type: precision_at_1000
+      value: 0.13899999999999998
+    - type: precision_at_3
+      value: 13.391
+    - type: precision_at_5
+      value: 9.876
+    - type: recall_at_1
+      value: 19.601
+    - type: recall_at_10
+      value: 44.746
+    - type: recall_at_100
+      value: 68.82300000000001
+    - type: recall_at_1000
+      value: 88.215
+    - type: recall_at_3
+      value: 31.239
+    - type: recall_at_5
+      value: 37.695
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackPhysicsRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 30.130000000000003
+    - type: map_at_10
+      value: 40.96
+    - type: map_at_100
+      value: 42.282
+    - type: map_at_1000
+      value: 42.392
+    - type: map_at_3
+      value: 37.889
+    - type: map_at_5
+      value: 39.661
+    - type: mrr_at_1
+      value: 36.958999999999996
+    - type: mrr_at_10
+      value: 46.835
+    - type: mrr_at_100
+      value: 47.644
+    - type: mrr_at_1000
+      value: 47.688
+    - type: mrr_at_3
+      value: 44.562000000000005
+    - type: mrr_at_5
+      value: 45.938
+    - type: ndcg_at_1
+      value: 36.958999999999996
+    - type: ndcg_at_10
+      value: 47.06
+    - type: ndcg_at_100
+      value: 52.345
+    - type: ndcg_at_1000
+      value: 54.35
+    - type: ndcg_at_3
+      value: 42.301
+    - type: ndcg_at_5
+      value: 44.635999999999996
+    - type: precision_at_1
+      value: 36.958999999999996
+    - type: precision_at_10
+      value: 8.479000000000001
+    - type: precision_at_100
+      value: 1.284
+    - type: precision_at_1000
+      value: 0.163
+    - type: precision_at_3
+      value: 20.244
+    - type: precision_at_5
+      value: 14.224999999999998
+    - type: recall_at_1
+      value: 30.130000000000003
+    - type: recall_at_10
+      value: 59.27
+    - type: recall_at_100
+      value: 81.195
+    - type: recall_at_1000
+      value: 94.21199999999999
+    - type: recall_at_3
+      value: 45.885
+    - type: recall_at_5
+      value: 52.016
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackProgrammersRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 26.169999999999998
+    - type: map_at_10
+      value: 36.451
+    - type: map_at_100
+      value: 37.791000000000004
+    - type: map_at_1000
+      value: 37.897
+    - type: map_at_3
+      value: 33.109
+    - type: map_at_5
+      value: 34.937000000000005
+    - type: mrr_at_1
+      value: 32.877
+    - type: mrr_at_10
+      value: 42.368
+    - type: mrr_at_100
+      value: 43.201
+    - type: mrr_at_1000
+      value: 43.259
+    - type: mrr_at_3
+      value: 39.763999999999996
+    - type: mrr_at_5
+      value: 41.260000000000005
+    - type: ndcg_at_1
+      value: 32.877
+    - type: ndcg_at_10
+      value: 42.659000000000006
+    - type: ndcg_at_100
+      value: 48.161
+    - type: ndcg_at_1000
+      value: 50.345
+    - type: ndcg_at_3
+      value: 37.302
+    - type: ndcg_at_5
+      value: 39.722
+    - type: precision_at_1
+      value: 32.877
+    - type: precision_at_10
+      value: 7.9
+    - type: precision_at_100
+      value: 1.236
+    - type: precision_at_1000
+      value: 0.158
+    - type: precision_at_3
+      value: 17.846
+    - type: precision_at_5
+      value: 12.9
+    - type: recall_at_1
+      value: 26.169999999999998
+    - type: recall_at_10
+      value: 55.35
+    - type: recall_at_100
+      value: 78.755
+    - type: recall_at_1000
+      value: 93.518
+    - type: recall_at_3
+      value: 40.176
+    - type: recall_at_5
+      value: 46.589000000000006
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 27.15516666666667
+    - type: map_at_10
+      value: 36.65741666666667
+    - type: map_at_100
+      value: 37.84991666666666
+    - type: map_at_1000
+      value: 37.96316666666667
+    - type: map_at_3
+      value: 33.74974999999999
+    - type: map_at_5
+      value: 35.3765
+    - type: mrr_at_1
+      value: 32.08233333333334
+    - type: mrr_at_10
+      value: 41.033833333333334
+    - type: mrr_at_100
+      value: 41.84524999999999
+    - type: mrr_at_1000
+      value: 41.89983333333333
+    - type: mrr_at_3
+      value: 38.62008333333333
+    - type: mrr_at_5
+      value: 40.03441666666666
+    - type: ndcg_at_1
+      value: 32.08233333333334
+    - type: ndcg_at_10
+      value: 42.229
+    - type: ndcg_at_100
+      value: 47.26716666666667
+    - type: ndcg_at_1000
+      value: 49.43466666666667
+    - type: ndcg_at_3
+      value: 37.36408333333333
+    - type: ndcg_at_5
+      value: 39.6715
+    - type: precision_at_1
+      value: 32.08233333333334
+    - type: precision_at_10
+      value: 7.382583333333334
+    - type: precision_at_100
+      value: 1.16625
+    - type: precision_at_1000
+      value: 0.15408333333333332
+    - type: precision_at_3
+      value: 17.218
+    - type: precision_at_5
+      value: 12.21875
+    - type: recall_at_1
+      value: 27.15516666666667
+    - type: recall_at_10
+      value: 54.36683333333333
+    - type: recall_at_100
+      value: 76.37183333333333
+    - type: recall_at_1000
+      value: 91.26183333333333
+    - type: recall_at_3
+      value: 40.769916666666674
+    - type: recall_at_5
+      value: 46.702333333333335
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackStatsRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 25.749
+    - type: map_at_10
+      value: 33.001999999999995
+    - type: map_at_100
+      value: 33.891
+    - type: map_at_1000
+      value: 33.993
+    - type: map_at_3
+      value: 30.703999999999997
+    - type: map_at_5
+      value: 31.959
+    - type: mrr_at_1
+      value: 28.834
+    - type: mrr_at_10
+      value: 35.955
+    - type: mrr_at_100
+      value: 36.709
+    - type: mrr_at_1000
+      value: 36.779
+    - type: mrr_at_3
+      value: 33.947
+    - type: mrr_at_5
+      value: 35.089
+    - type: ndcg_at_1
+      value: 28.834
+    - type: ndcg_at_10
+      value: 37.329
+    - type: ndcg_at_100
+      value: 41.79
+    - type: ndcg_at_1000
+      value: 44.169000000000004
+    - type: ndcg_at_3
+      value: 33.184999999999995
+    - type: ndcg_at_5
+      value: 35.107
+    - type: precision_at_1
+      value: 28.834
+    - type: precision_at_10
+      value: 5.7669999999999995
+    - type: precision_at_100
+      value: 0.876
+    - type: precision_at_1000
+      value: 0.11399999999999999
+    - type: precision_at_3
+      value: 14.213000000000001
+    - type: precision_at_5
+      value: 9.754999999999999
+    - type: recall_at_1
+      value: 25.749
+    - type: recall_at_10
+      value: 47.791
+    - type: recall_at_100
+      value: 68.255
+    - type: recall_at_1000
+      value: 85.749
+    - type: recall_at_3
+      value: 36.199
+    - type: recall_at_5
+      value: 41.071999999999996
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackTexRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 17.777
+    - type: map_at_10
+      value: 25.201
+    - type: map_at_100
+      value: 26.423999999999996
+    - type: map_at_1000
+      value: 26.544
+    - type: map_at_3
+      value: 22.869
+    - type: map_at_5
+      value: 24.023
+    - type: mrr_at_1
+      value: 21.473
+    - type: mrr_at_10
+      value: 29.12
+    - type: mrr_at_100
+      value: 30.144
+    - type: mrr_at_1000
+      value: 30.215999999999998
+    - type: mrr_at_3
+      value: 26.933
+    - type: mrr_at_5
+      value: 28.051
+    - type: ndcg_at_1
+      value: 21.473
+    - type: ndcg_at_10
+      value: 30.003
+    - type: ndcg_at_100
+      value: 35.766
+    - type: ndcg_at_1000
+      value: 38.501000000000005
+    - type: ndcg_at_3
+      value: 25.773000000000003
+    - type: ndcg_at_5
+      value: 27.462999999999997
+    - type: precision_at_1
+      value: 21.473
+    - type: precision_at_10
+      value: 5.482
+    - type: precision_at_100
+      value: 0.975
+    - type: precision_at_1000
+      value: 0.13799999999999998
+    - type: precision_at_3
+      value: 12.205
+    - type: precision_at_5
+      value: 8.692
+    - type: recall_at_1
+      value: 17.777
+    - type: recall_at_10
+      value: 40.582
+    - type: recall_at_100
+      value: 66.305
+    - type: recall_at_1000
+      value: 85.636
+    - type: recall_at_3
+      value: 28.687
+    - type: recall_at_5
+      value: 33.089
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackUnixRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 26.677
+    - type: map_at_10
+      value: 36.309000000000005
+    - type: map_at_100
+      value: 37.403999999999996
+    - type: map_at_1000
+      value: 37.496
+    - type: map_at_3
+      value: 33.382
+    - type: map_at_5
+      value: 34.98
+    - type: mrr_at_1
+      value: 31.343
+    - type: mrr_at_10
+      value: 40.549
+    - type: mrr_at_100
+      value: 41.342
+    - type: mrr_at_1000
+      value: 41.397
+    - type: mrr_at_3
+      value: 38.029
+    - type: mrr_at_5
+      value: 39.451
+    - type: ndcg_at_1
+      value: 31.343
+    - type: ndcg_at_10
+      value: 42.1
+    - type: ndcg_at_100
+      value: 47.089999999999996
+    - type: ndcg_at_1000
+      value: 49.222
+    - type: ndcg_at_3
+      value: 36.836999999999996
+    - type: ndcg_at_5
+      value: 39.21
+    - type: precision_at_1
+      value: 31.343
+    - type: precision_at_10
+      value: 7.164
+    - type: precision_at_100
+      value: 1.0959999999999999
+    - type: precision_at_1000
+      value: 0.13899999999999998
+    - type: precision_at_3
+      value: 16.915
+    - type: precision_at_5
+      value: 11.940000000000001
+    - type: recall_at_1
+      value: 26.677
+    - type: recall_at_10
+      value: 55.54599999999999
+    - type: recall_at_100
+      value: 77.094
+    - type: recall_at_1000
+      value: 92.01
+    - type: recall_at_3
+      value: 41.191
+    - type: recall_at_5
+      value: 47.006
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackWebmastersRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 24.501
+    - type: map_at_10
+      value: 33.102
+    - type: map_at_100
+      value: 34.676
+    - type: map_at_1000
+      value: 34.888000000000005
+    - type: map_at_3
+      value: 29.944
+    - type: map_at_5
+      value: 31.613999999999997
+    - type: mrr_at_1
+      value: 29.447000000000003
+    - type: mrr_at_10
+      value: 37.996
+    - type: mrr_at_100
+      value: 38.946
+    - type: mrr_at_1000
+      value: 38.995000000000005
+    - type: mrr_at_3
+      value: 35.079
+    - type: mrr_at_5
+      value: 36.69
+    - type: ndcg_at_1
+      value: 29.447000000000003
+    - type: ndcg_at_10
+      value: 39.232
+    - type: ndcg_at_100
+      value: 45.247
+    - type: ndcg_at_1000
+      value: 47.613
+    - type: ndcg_at_3
+      value: 33.922999999999995
+    - type: ndcg_at_5
+      value: 36.284
+    - type: precision_at_1
+      value: 29.447000000000003
+    - type: precision_at_10
+      value: 7.648000000000001
+    - type: precision_at_100
+      value: 1.516
+    - type: precision_at_1000
+      value: 0.23900000000000002
+    - type: precision_at_3
+      value: 16.008
+    - type: precision_at_5
+      value: 11.779
+    - type: recall_at_1
+      value: 24.501
+    - type: recall_at_10
+      value: 51.18899999999999
+    - type: recall_at_100
+      value: 78.437
+    - type: recall_at_1000
+      value: 92.842
+    - type: recall_at_3
+      value: 35.808
+    - type: recall_at_5
+      value: 42.197
+  - task:
+      type: Retrieval
+    dataset:
+      type: BeIR/cqadupstack
+      name: MTEB CQADupstackWordpressRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 22.039
+    - type: map_at_10
+      value: 30.377
+    - type: map_at_100
+      value: 31.275
+    - type: map_at_1000
+      value: 31.379
+    - type: map_at_3
+      value: 27.98
+    - type: map_at_5
+      value: 29.358
+    - type: mrr_at_1
+      value: 24.03
+    - type: mrr_at_10
+      value: 32.568000000000005
+    - type: mrr_at_100
+      value: 33.403
+    - type: mrr_at_1000
+      value: 33.475
+    - type: mrr_at_3
+      value: 30.436999999999998
+    - type: mrr_at_5
+      value: 31.796000000000003
+    - type: ndcg_at_1
+      value: 24.03
+    - type: ndcg_at_10
+      value: 35.198
+    - type: ndcg_at_100
+      value: 39.668
+    - type: ndcg_at_1000
+      value: 42.296
+    - type: ndcg_at_3
+      value: 30.709999999999997
+    - type: ndcg_at_5
+      value: 33.024
+    - type: precision_at_1
+      value: 24.03
+    - type: precision_at_10
+      value: 5.564
+    - type: precision_at_100
+      value: 0.828
+    - type: precision_at_1000
+      value: 0.117
+    - type: precision_at_3
+      value: 13.309000000000001
+    - type: precision_at_5
+      value: 9.39
+    - type: recall_at_1
+      value: 22.039
+    - type: recall_at_10
+      value: 47.746
+    - type: recall_at_100
+      value: 68.23599999999999
+    - type: recall_at_1000
+      value: 87.852
+    - type: recall_at_3
+      value: 35.852000000000004
+    - type: recall_at_5
+      value: 41.410000000000004
+  - task:
+      type: Retrieval
+    dataset:
+      type: climate-fever
+      name: MTEB ClimateFEVER
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 15.692999999999998
+    - type: map_at_10
+      value: 26.903
+    - type: map_at_100
+      value: 28.987000000000002
+    - type: map_at_1000
+      value: 29.176999999999996
+    - type: map_at_3
+      value: 22.137
+    - type: map_at_5
+      value: 24.758
+    - type: mrr_at_1
+      value: 35.57
+    - type: mrr_at_10
+      value: 47.821999999999996
+    - type: mrr_at_100
+      value: 48.608000000000004
+    - type: mrr_at_1000
+      value: 48.638999999999996
+    - type: mrr_at_3
+      value: 44.452000000000005
+    - type: mrr_at_5
+      value: 46.546
+    - type: ndcg_at_1
+      value: 35.57
+    - type: ndcg_at_10
+      value: 36.567
+    - type: ndcg_at_100
+      value: 44.085
+    - type: ndcg_at_1000
+      value: 47.24
+    - type: ndcg_at_3
+      value: 29.964000000000002
+    - type: ndcg_at_5
+      value: 32.511
+    - type: precision_at_1
+      value: 35.57
+    - type: precision_at_10
+      value: 11.485
+    - type: precision_at_100
+      value: 1.9619999999999997
+    - type: precision_at_1000
+      value: 0.256
+    - type: precision_at_3
+      value: 22.237000000000002
+    - type: precision_at_5
+      value: 17.471999999999998
+    - type: recall_at_1
+      value: 15.692999999999998
+    - type: recall_at_10
+      value: 43.056
+    - type: recall_at_100
+      value: 68.628
+    - type: recall_at_1000
+      value: 86.075
+    - type: recall_at_3
+      value: 26.918999999999997
+    - type: recall_at_5
+      value: 34.14
+  - task:
+      type: Retrieval
+    dataset:
+      type: dbpedia-entity
+      name: MTEB DBPedia
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 9.53
+    - type: map_at_10
+      value: 20.951
+    - type: map_at_100
+      value: 30.136000000000003
+    - type: map_at_1000
+      value: 31.801000000000002
+    - type: map_at_3
+      value: 15.021
+    - type: map_at_5
+      value: 17.471999999999998
+    - type: mrr_at_1
+      value: 71.0
+    - type: mrr_at_10
+      value: 79.176
+    - type: mrr_at_100
+      value: 79.418
+    - type: mrr_at_1000
+      value: 79.426
+    - type: mrr_at_3
+      value: 78.125
+    - type: mrr_at_5
+      value: 78.61200000000001
+    - type: ndcg_at_1
+      value: 58.5
+    - type: ndcg_at_10
+      value: 44.106
+    - type: ndcg_at_100
+      value: 49.268
+    - type: ndcg_at_1000
+      value: 56.711999999999996
+    - type: ndcg_at_3
+      value: 48.934
+    - type: ndcg_at_5
+      value: 45.826
+    - type: precision_at_1
+      value: 71.0
+    - type: precision_at_10
+      value: 35.0
+    - type: precision_at_100
+      value: 11.360000000000001
+    - type: precision_at_1000
+      value: 2.046
+    - type: precision_at_3
+      value: 52.833
+    - type: precision_at_5
+      value: 44.15
+    - type: recall_at_1
+      value: 9.53
+    - type: recall_at_10
+      value: 26.811
+    - type: recall_at_100
+      value: 55.916999999999994
+    - type: recall_at_1000
+      value: 79.973
+    - type: recall_at_3
+      value: 16.413
+    - type: recall_at_5
+      value: 19.980999999999998
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/emotion
+      name: MTEB EmotionClassification
+      config: default
+      split: test
+      revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
+    metrics:
+    - type: accuracy
+      value: 51.519999999999996
+    - type: f1
+      value: 46.36601294761231
+  - task:
+      type: Retrieval
+    dataset:
+      type: fever
+      name: MTEB FEVER
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 74.413
+    - type: map_at_10
+      value: 83.414
+    - type: map_at_100
+      value: 83.621
+    - type: map_at_1000
+      value: 83.635
+    - type: map_at_3
+      value: 82.337
+    - type: map_at_5
+      value: 83.039
+    - type: mrr_at_1
+      value: 80.19800000000001
+    - type: mrr_at_10
+      value: 87.715
+    - type: mrr_at_100
+      value: 87.778
+    - type: mrr_at_1000
+      value: 87.779
+    - type: mrr_at_3
+      value: 87.106
+    - type: mrr_at_5
+      value: 87.555
+    - type: ndcg_at_1
+      value: 80.19800000000001
+    - type: ndcg_at_10
+      value: 87.182
+    - type: ndcg_at_100
+      value: 87.90299999999999
+    - type: ndcg_at_1000
+      value: 88.143
+    - type: ndcg_at_3
+      value: 85.60600000000001
+    - type: ndcg_at_5
+      value: 86.541
+    - type: precision_at_1
+      value: 80.19800000000001
+    - type: precision_at_10
+      value: 10.531
+    - type: precision_at_100
+      value: 1.113
+    - type: precision_at_1000
+      value: 0.11499999999999999
+    - type: precision_at_3
+      value: 32.933
+    - type: precision_at_5
+      value: 20.429
+    - type: recall_at_1
+      value: 74.413
+    - type: recall_at_10
+      value: 94.363
+    - type: recall_at_100
+      value: 97.165
+    - type: recall_at_1000
+      value: 98.668
+    - type: recall_at_3
+      value: 90.108
+    - type: recall_at_5
+      value: 92.52
+  - task:
+      type: Retrieval
+    dataset:
+      type: fiqa
+      name: MTEB FiQA2018
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 22.701
+    - type: map_at_10
+      value: 37.122
+    - type: map_at_100
+      value: 39.178000000000004
+    - type: map_at_1000
+      value: 39.326
+    - type: map_at_3
+      value: 32.971000000000004
+    - type: map_at_5
+      value: 35.332
+    - type: mrr_at_1
+      value: 44.753
+    - type: mrr_at_10
+      value: 53.452
+    - type: mrr_at_100
+      value: 54.198
+    - type: mrr_at_1000
+      value: 54.225
+    - type: mrr_at_3
+      value: 50.952
+    - type: mrr_at_5
+      value: 52.464
+    - type: ndcg_at_1
+      value: 44.753
+    - type: ndcg_at_10
+      value: 45.021
+    - type: ndcg_at_100
+      value: 52.028
+    - type: ndcg_at_1000
+      value: 54.596000000000004
+    - type: ndcg_at_3
+      value: 41.622
+    - type: ndcg_at_5
+      value: 42.736000000000004
+    - type: precision_at_1
+      value: 44.753
+    - type: precision_at_10
+      value: 12.284
+    - type: precision_at_100
+      value: 1.955
+    - type: precision_at_1000
+      value: 0.243
+    - type: precision_at_3
+      value: 27.828999999999997
+    - type: precision_at_5
+      value: 20.061999999999998
+    - type: recall_at_1
+      value: 22.701
+    - type: recall_at_10
+      value: 51.432
+    - type: recall_at_100
+      value: 77.009
+    - type: recall_at_1000
+      value: 92.511
+    - type: recall_at_3
+      value: 37.919000000000004
+    - type: recall_at_5
+      value: 44.131
+  - task:
+      type: Retrieval
+    dataset:
+      type: hotpotqa
+      name: MTEB HotpotQA
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 40.189
+    - type: map_at_10
+      value: 66.24600000000001
+    - type: map_at_100
+      value: 67.098
+    - type: map_at_1000
+      value: 67.149
+    - type: map_at_3
+      value: 62.684
+    - type: map_at_5
+      value: 64.974
+    - type: mrr_at_1
+      value: 80.378
+    - type: mrr_at_10
+      value: 86.127
+    - type: mrr_at_100
+      value: 86.29299999999999
+    - type: mrr_at_1000
+      value: 86.297
+    - type: mrr_at_3
+      value: 85.31400000000001
+    - type: mrr_at_5
+      value: 85.858
+    - type: ndcg_at_1
+      value: 80.378
+    - type: ndcg_at_10
+      value: 74.101
+    - type: ndcg_at_100
+      value: 76.993
+    - type: ndcg_at_1000
+      value: 77.948
+    - type: ndcg_at_3
+      value: 69.232
+    - type: ndcg_at_5
+      value: 72.04599999999999
+    - type: precision_at_1
+      value: 80.378
+    - type: precision_at_10
+      value: 15.595999999999998
+    - type: precision_at_100
+      value: 1.7840000000000003
+    - type: precision_at_1000
+      value: 0.191
+    - type: precision_at_3
+      value: 44.884
+    - type: precision_at_5
+      value: 29.145
+    - type: recall_at_1
+      value: 40.189
+    - type: recall_at_10
+      value: 77.981
+    - type: recall_at_100
+      value: 89.21
+    - type: recall_at_1000
+      value: 95.48299999999999
+    - type: recall_at_3
+      value: 67.326
+    - type: recall_at_5
+      value: 72.863
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/imdb
+      name: MTEB ImdbClassification
+      config: default
+      split: test
+      revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
+    metrics:
+    - type: accuracy
+      value: 92.84599999999999
+    - type: ap
+      value: 89.4710787567357
+    - type: f1
+      value: 92.83752676932258
+  - task:
+      type: Retrieval
+    dataset:
+      type: msmarco
+      name: MTEB MSMARCO
+      config: default
+      split: dev
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 23.132
+    - type: map_at_10
+      value: 35.543
+    - type: map_at_100
+      value: 36.702
+    - type: map_at_1000
+      value: 36.748999999999995
+    - type: map_at_3
+      value: 31.737
+    - type: map_at_5
+      value: 33.927
+    - type: mrr_at_1
+      value: 23.782
+    - type: mrr_at_10
+      value: 36.204
+    - type: mrr_at_100
+      value: 37.29
+    - type: mrr_at_1000
+      value: 37.330999999999996
+    - type: mrr_at_3
+      value: 32.458999999999996
+    - type: mrr_at_5
+      value: 34.631
+    - type: ndcg_at_1
+      value: 23.782
+    - type: ndcg_at_10
+      value: 42.492999999999995
+    - type: ndcg_at_100
+      value: 47.985
+    - type: ndcg_at_1000
+      value: 49.141
+    - type: ndcg_at_3
+      value: 34.748000000000005
+    - type: ndcg_at_5
+      value: 38.651
+    - type: precision_at_1
+      value: 23.782
+    - type: precision_at_10
+      value: 6.665
+    - type: precision_at_100
+      value: 0.941
+    - type: precision_at_1000
+      value: 0.104
+    - type: precision_at_3
+      value: 14.776
+    - type: precision_at_5
+      value: 10.84
+    - type: recall_at_1
+      value: 23.132
+    - type: recall_at_10
+      value: 63.794
+    - type: recall_at_100
+      value: 89.027
+    - type: recall_at_1000
+      value: 97.807
+    - type: recall_at_3
+      value: 42.765
+    - type: recall_at_5
+      value: 52.11
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/mtop_domain
+      name: MTEB MTOPDomainClassification (en)
+      config: en
+      split: test
+      revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
+    metrics:
+    - type: accuracy
+      value: 94.59188326493388
+    - type: f1
+      value: 94.3842594786827
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/mtop_intent
+      name: MTEB MTOPIntentClassification (en)
+      config: en
+      split: test
+      revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
+    metrics:
+    - type: accuracy
+      value: 79.49384404924761
+    - type: f1
+      value: 59.7580539534629
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_massive_intent
+      name: MTEB MassiveIntentClassification (en)
+      config: en
+      split: test
+      revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
+    metrics:
+    - type: accuracy
+      value: 77.56220578345663
+    - type: f1
+      value: 75.27228165561478
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/amazon_massive_scenario
+      name: MTEB MassiveScenarioClassification (en)
+      config: en
+      split: test
+      revision: 7d571f92784cd94a019292a1f45445077d0ef634
+    metrics:
+    - type: accuracy
+      value: 80.53463349024884
+    - type: f1
+      value: 80.4893958236536
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/medrxiv-clustering-p2p
+      name: MTEB MedrxivClusteringP2P
+      config: default
+      split: test
+      revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
+    metrics:
+    - type: v_measure
+      value: 32.56100273484962
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/medrxiv-clustering-s2s
+      name: MTEB MedrxivClusteringS2S
+      config: default
+      split: test
+      revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
+    metrics:
+    - type: v_measure
+      value: 31.470380028839607
+  - task:
+      type: Reranking
+    dataset:
+      type: mteb/mind_small
+      name: MTEB MindSmallReranking
+      config: default
+      split: test
+      revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
+    metrics:
+    - type: map
+      value: 32.06102792457849
+    - type: mrr
+      value: 33.30709199672238
+  - task:
+      type: Retrieval
+    dataset:
+      type: nfcorpus
+      name: MTEB NFCorpus
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 6.776999999999999
+    - type: map_at_10
+      value: 14.924000000000001
+    - type: map_at_100
+      value: 18.955
+    - type: map_at_1000
+      value: 20.538999999999998
+    - type: map_at_3
+      value: 10.982
+    - type: map_at_5
+      value: 12.679000000000002
+    - type: mrr_at_1
+      value: 47.988
+    - type: mrr_at_10
+      value: 57.232000000000006
+    - type: mrr_at_100
+      value: 57.818999999999996
+    - type: mrr_at_1000
+      value: 57.847
+    - type: mrr_at_3
+      value: 54.901999999999994
+    - type: mrr_at_5
+      value: 56.481
+    - type: ndcg_at_1
+      value: 46.594
+    - type: ndcg_at_10
+      value: 38.129000000000005
+    - type: ndcg_at_100
+      value: 35.54
+    - type: ndcg_at_1000
+      value: 44.172
+    - type: ndcg_at_3
+      value: 43.025999999999996
+    - type: ndcg_at_5
+      value: 41.052
+    - type: precision_at_1
+      value: 47.988
+    - type: precision_at_10
+      value: 28.111000000000004
+    - type: precision_at_100
+      value: 8.929
+    - type: precision_at_1000
+      value: 2.185
+    - type: precision_at_3
+      value: 40.144000000000005
+    - type: precision_at_5
+      value: 35.232
+    - type: recall_at_1
+      value: 6.776999999999999
+    - type: recall_at_10
+      value: 19.289
+    - type: recall_at_100
+      value: 36.359
+    - type: recall_at_1000
+      value: 67.54
+    - type: recall_at_3
+      value: 11.869
+    - type: recall_at_5
+      value: 14.999
+  - task:
+      type: Retrieval
+    dataset:
+      type: nq
+      name: MTEB NQ
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 31.108000000000004
+    - type: map_at_10
+      value: 47.126000000000005
+    - type: map_at_100
+      value: 48.171
+    - type: map_at_1000
+      value: 48.199
+    - type: map_at_3
+      value: 42.734
+    - type: map_at_5
+      value: 45.362
+    - type: mrr_at_1
+      value: 34.936
+    - type: mrr_at_10
+      value: 49.571
+    - type: mrr_at_100
+      value: 50.345
+    - type: mrr_at_1000
+      value: 50.363
+    - type: mrr_at_3
+      value: 45.959
+    - type: mrr_at_5
+      value: 48.165
+    - type: ndcg_at_1
+      value: 34.936
+    - type: ndcg_at_10
+      value: 55.028999999999996
+    - type: ndcg_at_100
+      value: 59.244
+    - type: ndcg_at_1000
+      value: 59.861
+    - type: ndcg_at_3
+      value: 46.872
+    - type: ndcg_at_5
+      value: 51.217999999999996
+    - type: precision_at_1
+      value: 34.936
+    - type: precision_at_10
+      value: 9.099
+    - type: precision_at_100
+      value: 1.145
+    - type: precision_at_1000
+      value: 0.12
+    - type: precision_at_3
+      value: 21.456
+    - type: precision_at_5
+      value: 15.411
+    - type: recall_at_1
+      value: 31.108000000000004
+    - type: recall_at_10
+      value: 76.53999999999999
+    - type: recall_at_100
+      value: 94.39
+    - type: recall_at_1000
+      value: 98.947
+    - type: recall_at_3
+      value: 55.572
+    - type: recall_at_5
+      value: 65.525
+  - task:
+      type: Retrieval
+    dataset:
+      type: quora
+      name: MTEB QuoraRetrieval
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 71.56400000000001
+    - type: map_at_10
+      value: 85.482
+    - type: map_at_100
+      value: 86.114
+    - type: map_at_1000
+      value: 86.13
+    - type: map_at_3
+      value: 82.607
+    - type: map_at_5
+      value: 84.405
+    - type: mrr_at_1
+      value: 82.42
+    - type: mrr_at_10
+      value: 88.304
+    - type: mrr_at_100
+      value: 88.399
+    - type: mrr_at_1000
+      value: 88.399
+    - type: mrr_at_3
+      value: 87.37
+    - type: mrr_at_5
+      value: 88.024
+    - type: ndcg_at_1
+      value: 82.45
+    - type: ndcg_at_10
+      value: 89.06500000000001
+    - type: ndcg_at_100
+      value: 90.232
+    - type: ndcg_at_1000
+      value: 90.305
+    - type: ndcg_at_3
+      value: 86.375
+    - type: ndcg_at_5
+      value: 87.85300000000001
+    - type: precision_at_1
+      value: 82.45
+    - type: precision_at_10
+      value: 13.486999999999998
+    - type: precision_at_100
+      value: 1.534
+    - type: precision_at_1000
+      value: 0.157
+    - type: precision_at_3
+      value: 37.813
+    - type: precision_at_5
+      value: 24.773999999999997
+    - type: recall_at_1
+      value: 71.56400000000001
+    - type: recall_at_10
+      value: 95.812
+    - type: recall_at_100
+      value: 99.7
+    - type: recall_at_1000
+      value: 99.979
+    - type: recall_at_3
+      value: 87.966
+    - type: recall_at_5
+      value: 92.268
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/reddit-clustering
+      name: MTEB RedditClustering
+      config: default
+      split: test
+      revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
+    metrics:
+    - type: v_measure
+      value: 57.241876648614145
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/reddit-clustering-p2p
+      name: MTEB RedditClusteringP2P
+      config: default
+      split: test
+      revision: 282350215ef01743dc01b456c7f5241fa8937f16
+    metrics:
+    - type: v_measure
+      value: 64.66212576446223
+  - task:
+      type: Retrieval
+    dataset:
+      type: scidocs
+      name: MTEB SCIDOCS
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 5.308
+    - type: map_at_10
+      value: 13.803
+    - type: map_at_100
+      value: 16.176
+    - type: map_at_1000
+      value: 16.561
+    - type: map_at_3
+      value: 9.761000000000001
+    - type: map_at_5
+      value: 11.802
+    - type: mrr_at_1
+      value: 26.200000000000003
+    - type: mrr_at_10
+      value: 37.621
+    - type: mrr_at_100
+      value: 38.767
+    - type: mrr_at_1000
+      value: 38.815
+    - type: mrr_at_3
+      value: 34.117
+    - type: mrr_at_5
+      value: 36.107
+    - type: ndcg_at_1
+      value: 26.200000000000003
+    - type: ndcg_at_10
+      value: 22.64
+    - type: ndcg_at_100
+      value: 31.567
+    - type: ndcg_at_1000
+      value: 37.623
+    - type: ndcg_at_3
+      value: 21.435000000000002
+    - type: ndcg_at_5
+      value: 18.87
+    - type: precision_at_1
+      value: 26.200000000000003
+    - type: precision_at_10
+      value: 11.74
+    - type: precision_at_100
+      value: 2.465
+    - type: precision_at_1000
+      value: 0.391
+    - type: precision_at_3
+      value: 20.033
+    - type: precision_at_5
+      value: 16.64
+    - type: recall_at_1
+      value: 5.308
+    - type: recall_at_10
+      value: 23.794999999999998
+    - type: recall_at_100
+      value: 50.015
+    - type: recall_at_1000
+      value: 79.283
+    - type: recall_at_3
+      value: 12.178
+    - type: recall_at_5
+      value: 16.882
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sickr-sts
+      name: MTEB SICK-R
+      config: default
+      split: test
+      revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
+    metrics:
+    - type: cos_sim_pearson
+      value: 84.93231134675553
+    - type: cos_sim_spearman
+      value: 81.68319292603205
+    - type: euclidean_pearson
+      value: 81.8396814380367
+    - type: euclidean_spearman
+      value: 81.24641903349945
+    - type: manhattan_pearson
+      value: 81.84698799204274
+    - type: manhattan_spearman
+      value: 81.24269997904105
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sts12-sts
+      name: MTEB STS12
+      config: default
+      split: test
+      revision: a0d554a64d88156834ff5ae9920b964011b16384
+    metrics:
+    - type: cos_sim_pearson
+      value: 86.73241671587446
+    - type: cos_sim_spearman
+      value: 79.05091082971826
+    - type: euclidean_pearson
+      value: 83.91146869578044
+    - type: euclidean_spearman
+      value: 79.87978465370936
+    - type: manhattan_pearson
+      value: 83.90888338917678
+    - type: manhattan_spearman
+      value: 79.87482848584241
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sts13-sts
+      name: MTEB STS13
+      config: default
+      split: test
+      revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
+    metrics:
+    - type: cos_sim_pearson
+      value: 85.14970731146177
+    - type: cos_sim_spearman
+      value: 86.37363490084627
+    - type: euclidean_pearson
+      value: 83.02154218530433
+    - type: euclidean_spearman
+      value: 83.80258761957367
+    - type: manhattan_pearson
+      value: 83.01664495119347
+    - type: manhattan_spearman
+      value: 83.77567458007952
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sts14-sts
+      name: MTEB STS14
+      config: default
+      split: test
+      revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
+    metrics:
+    - type: cos_sim_pearson
+      value: 83.40474139886784
+    - type: cos_sim_spearman
+      value: 82.77768789165984
+    - type: euclidean_pearson
+      value: 80.7065877443695
+    - type: euclidean_spearman
+      value: 81.375940662505
+    - type: manhattan_pearson
+      value: 80.6507552270278
+    - type: manhattan_spearman
+      value: 81.32782179098741
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sts15-sts
+      name: MTEB STS15
+      config: default
+      split: test
+      revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
+    metrics:
+    - type: cos_sim_pearson
+      value: 87.08585968722274
+    - type: cos_sim_spearman
+      value: 88.03110031451399
+    - type: euclidean_pearson
+      value: 85.74012019602384
+    - type: euclidean_spearman
+      value: 86.13592849438209
+    - type: manhattan_pearson
+      value: 85.74404842369206
+    - type: manhattan_spearman
+      value: 86.14492318960154
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sts16-sts
+      name: MTEB STS16
+      config: default
+      split: test
+      revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
+    metrics:
+    - type: cos_sim_pearson
+      value: 84.95069052788875
+    - type: cos_sim_spearman
+      value: 86.4867991595147
+    - type: euclidean_pearson
+      value: 84.31013325754635
+    - type: euclidean_spearman
+      value: 85.01529258006482
+    - type: manhattan_pearson
+      value: 84.26995570085374
+    - type: manhattan_spearman
+      value: 84.96982104986162
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sts17-crosslingual-sts
+      name: MTEB STS17 (en-en)
+      config: en-en
+      split: test
+      revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
+    metrics:
+    - type: cos_sim_pearson
+      value: 87.54617647971897
+    - type: cos_sim_spearman
+      value: 87.49834181751034
+    - type: euclidean_pearson
+      value: 86.01015322577122
+    - type: euclidean_spearman
+      value: 84.63362652063199
+    - type: manhattan_pearson
+      value: 86.13807574475706
+    - type: manhattan_spearman
+      value: 84.7772370721132
+  - task:
+      type: STS
+    dataset:
+      type: mteb/sts22-crosslingual-sts
+      name: MTEB STS22 (en)
+      config: en
+      split: test
+      revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
+    metrics:
+    - type: cos_sim_pearson
+      value: 67.20047755786615
+    - type: cos_sim_spearman
+      value: 67.05324077987636
+    - type: euclidean_pearson
+      value: 66.91930642976601
+    - type: euclidean_spearman
+      value: 65.21491856099105
+    - type: manhattan_pearson
+      value: 66.78756851976624
+    - type: manhattan_spearman
+      value: 65.12356257740728
+  - task:
+      type: STS
+    dataset:
+      type: mteb/stsbenchmark-sts
+      name: MTEB STSBenchmark
+      config: default
+      split: test
+      revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
+    metrics:
+    - type: cos_sim_pearson
+      value: 86.19852871539686
+    - type: cos_sim_spearman
+      value: 87.5161895296395
+    - type: euclidean_pearson
+      value: 84.59848645207485
+    - type: euclidean_spearman
+      value: 85.26427328757919
+    - type: manhattan_pearson
+      value: 84.59747366996524
+    - type: manhattan_spearman
+      value: 85.24045855146915
+  - task:
+      type: Reranking
+    dataset:
+      type: mteb/scidocs-reranking
+      name: MTEB SciDocsRR
+      config: default
+      split: test
+      revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
+    metrics:
+    - type: map
+      value: 87.63320317811032
+    - type: mrr
+      value: 96.26242947321379
+  - task:
+      type: Retrieval
+    dataset:
+      type: scifact
+      name: MTEB SciFact
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 60.928000000000004
+    - type: map_at_10
+      value: 70.112
+    - type: map_at_100
+      value: 70.59299999999999
+    - type: map_at_1000
+      value: 70.623
+    - type: map_at_3
+      value: 66.846
+    - type: map_at_5
+      value: 68.447
+    - type: mrr_at_1
+      value: 64.0
+    - type: mrr_at_10
+      value: 71.212
+    - type: mrr_at_100
+      value: 71.616
+    - type: mrr_at_1000
+      value: 71.64500000000001
+    - type: mrr_at_3
+      value: 68.77799999999999
+    - type: mrr_at_5
+      value: 70.094
+    - type: ndcg_at_1
+      value: 64.0
+    - type: ndcg_at_10
+      value: 74.607
+    - type: ndcg_at_100
+      value: 76.416
+    - type: ndcg_at_1000
+      value: 77.102
+    - type: ndcg_at_3
+      value: 69.126
+    - type: ndcg_at_5
+      value: 71.41300000000001
+    - type: precision_at_1
+      value: 64.0
+    - type: precision_at_10
+      value: 9.933
+    - type: precision_at_100
+      value: 1.077
+    - type: precision_at_1000
+      value: 0.11299999999999999
+    - type: precision_at_3
+      value: 26.556
+    - type: precision_at_5
+      value: 17.467
+    - type: recall_at_1
+      value: 60.928000000000004
+    - type: recall_at_10
+      value: 87.322
+    - type: recall_at_100
+      value: 94.833
+    - type: recall_at_1000
+      value: 100.0
+    - type: recall_at_3
+      value: 72.628
+    - type: recall_at_5
+      value: 78.428
+  - task:
+      type: PairClassification
+    dataset:
+      type: mteb/sprintduplicatequestions-pairclassification
+      name: MTEB SprintDuplicateQuestions
+      config: default
+      split: test
+      revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
+    metrics:
+    - type: cos_sim_accuracy
+      value: 99.86237623762376
+    - type: cos_sim_ap
+      value: 96.72586477206649
+    - type: cos_sim_f1
+      value: 93.01858362631845
+    - type: cos_sim_precision
+      value: 93.4409687184662
+    - type: cos_sim_recall
+      value: 92.60000000000001
+    - type: dot_accuracy
+      value: 99.78019801980199
+    - type: dot_ap
+      value: 93.72748205246228
+    - type: dot_f1
+      value: 89.04109589041096
+    - type: dot_precision
+      value: 87.16475095785441
+    - type: dot_recall
+      value: 91.0
+    - type: euclidean_accuracy
+      value: 99.85445544554456
+    - type: euclidean_ap
+      value: 96.6661459876145
+    - type: euclidean_f1
+      value: 92.58337481333997
+    - type: euclidean_precision
+      value: 92.17046580773042
+    - type: euclidean_recall
+      value: 93.0
+    - type: manhattan_accuracy
+      value: 99.85445544554456
+    - type: manhattan_ap
+      value: 96.6883549244056
+    - type: manhattan_f1
+      value: 92.57598405580468
+    - type: manhattan_precision
+      value: 92.25422045680239
+    - type: manhattan_recall
+      value: 92.9
+    - type: max_accuracy
+      value: 99.86237623762376
+    - type: max_ap
+      value: 96.72586477206649
+    - type: max_f1
+      value: 93.01858362631845
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/stackexchange-clustering
+      name: MTEB StackExchangeClustering
+      config: default
+      split: test
+      revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
+    metrics:
+    - type: v_measure
+      value: 66.39930057069995
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/stackexchange-clustering-p2p
+      name: MTEB StackExchangeClusteringP2P
+      config: default
+      split: test
+      revision: 815ca46b2622cec33ccafc3735d572c266efdb44
+    metrics:
+    - type: v_measure
+      value: 34.96398659903402
+  - task:
+      type: Reranking
+    dataset:
+      type: mteb/stackoverflowdupquestions-reranking
+      name: MTEB StackOverflowDupQuestions
+      config: default
+      split: test
+      revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
+    metrics:
+    - type: map
+      value: 55.946944700355395
+    - type: mrr
+      value: 56.97151398438164
+  - task:
+      type: Summarization
+    dataset:
+      type: mteb/summeval
+      name: MTEB SummEval
+      config: default
+      split: test
+      revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
+    metrics:
+    - type: cos_sim_pearson
+      value: 31.541657650692905
+    - type: cos_sim_spearman
+      value: 31.605804192286303
+    - type: dot_pearson
+      value: 28.26905996736398
+    - type: dot_spearman
+      value: 27.864801765851187
+  - task:
+      type: Retrieval
+    dataset:
+      type: trec-covid
+      name: MTEB TRECCOVID
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 0.22599999999999998
+    - type: map_at_10
+      value: 1.8870000000000002
+    - type: map_at_100
+      value: 9.78
+    - type: map_at_1000
+      value: 22.514
+    - type: map_at_3
+      value: 0.6669999999999999
+    - type: map_at_5
+      value: 1.077
+    - type: mrr_at_1
+      value: 82.0
+    - type: mrr_at_10
+      value: 89.86699999999999
+    - type: mrr_at_100
+      value: 89.86699999999999
+    - type: mrr_at_1000
+      value: 89.86699999999999
+    - type: mrr_at_3
+      value: 89.667
+    - type: mrr_at_5
+      value: 89.667
+    - type: ndcg_at_1
+      value: 79.0
+    - type: ndcg_at_10
+      value: 74.818
+    - type: ndcg_at_100
+      value: 53.715999999999994
+    - type: ndcg_at_1000
+      value: 47.082
+    - type: ndcg_at_3
+      value: 82.134
+    - type: ndcg_at_5
+      value: 79.81899999999999
+    - type: precision_at_1
+      value: 82.0
+    - type: precision_at_10
+      value: 78.0
+    - type: precision_at_100
+      value: 54.48
+    - type: precision_at_1000
+      value: 20.518
+    - type: precision_at_3
+      value: 87.333
+    - type: precision_at_5
+      value: 85.2
+    - type: recall_at_1
+      value: 0.22599999999999998
+    - type: recall_at_10
+      value: 2.072
+    - type: recall_at_100
+      value: 13.013
+    - type: recall_at_1000
+      value: 43.462
+    - type: recall_at_3
+      value: 0.695
+    - type: recall_at_5
+      value: 1.139
+  - task:
+      type: Retrieval
+    dataset:
+      type: webis-touche2020
+      name: MTEB Touche2020
+      config: default
+      split: test
+      revision: None
+    metrics:
+    - type: map_at_1
+      value: 2.328
+    - type: map_at_10
+      value: 9.795
+    - type: map_at_100
+      value: 15.801000000000002
+    - type: map_at_1000
+      value: 17.23
+    - type: map_at_3
+      value: 4.734
+    - type: map_at_5
+      value: 6.644
+    - type: mrr_at_1
+      value: 30.612000000000002
+    - type: mrr_at_10
+      value: 46.902
+    - type: mrr_at_100
+      value: 47.495
+    - type: mrr_at_1000
+      value: 47.495
+    - type: mrr_at_3
+      value: 41.156
+    - type: mrr_at_5
+      value: 44.218
+    - type: ndcg_at_1
+      value: 28.571
+    - type: ndcg_at_10
+      value: 24.806
+    - type: ndcg_at_100
+      value: 36.419000000000004
+    - type: ndcg_at_1000
+      value: 47.272999999999996
+    - type: ndcg_at_3
+      value: 25.666
+    - type: ndcg_at_5
+      value: 25.448999999999998
+    - type: precision_at_1
+      value: 30.612000000000002
+    - type: precision_at_10
+      value: 23.061
+    - type: precision_at_100
+      value: 7.714
+    - type: precision_at_1000
+      value: 1.484
+    - type: precision_at_3
+      value: 26.531
+    - type: precision_at_5
+      value: 26.122
+    - type: recall_at_1
+      value: 2.328
+    - type: recall_at_10
+      value: 16.524
+    - type: recall_at_100
+      value: 47.179
+    - type: recall_at_1000
+      value: 81.22200000000001
+    - type: recall_at_3
+      value: 5.745
+    - type: recall_at_5
+      value: 9.339
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/toxic_conversations_50k
+      name: MTEB ToxicConversationsClassification
+      config: default
+      split: test
+      revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
+    metrics:
+    - type: accuracy
+      value: 70.9142
+    - type: ap
+      value: 14.335574772555415
+    - type: f1
+      value: 54.62839595194111
+  - task:
+      type: Classification
+    dataset:
+      type: mteb/tweet_sentiment_extraction
+      name: MTEB TweetSentimentExtractionClassification
+      config: default
+      split: test
+      revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
+    metrics:
+    - type: accuracy
+      value: 59.94340690435768
+    - type: f1
+      value: 60.286487936731916
+  - task:
+      type: Clustering
+    dataset:
+      type: mteb/twentynewsgroups-clustering
+      name: MTEB TwentyNewsgroupsClustering
+      config: default
+      split: test
+      revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
+    metrics:
+    - type: v_measure
+      value: 51.26597708987974
+  - task:
+      type: PairClassification
+    dataset:
+      type: mteb/twittersemeval2015-pairclassification
+      name: MTEB TwitterSemEval2015
+      config: default
+      split: test
+      revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
+    metrics:
+    - type: cos_sim_accuracy
+      value: 87.48882398521786
+    - type: cos_sim_ap
+      value: 79.04326607602204
+    - type: cos_sim_f1
+      value: 71.64566826860633
+    - type: cos_sim_precision
+      value: 70.55512918905092
+    - type: cos_sim_recall
+      value: 72.77044854881267
+    - type: dot_accuracy
+      value: 84.19264469213805
+    - type: dot_ap
+      value: 67.96360043562528
+    - type: dot_f1
+      value: 64.06418393006827
+    - type: dot_precision
+      value: 58.64941898706424
+    - type: dot_recall
+      value: 70.58047493403694
+    - type: euclidean_accuracy
+      value: 87.45902127913214
+    - type: euclidean_ap
+      value: 78.9742237648272
+    - type: euclidean_f1
+      value: 71.5553235908142
+    - type: euclidean_precision
+      value: 70.77955601445535
+    - type: euclidean_recall
+      value: 72.34828496042216
+    - type: manhattan_accuracy
+      value: 87.41729749061214
+    - type: manhattan_ap
+      value: 78.90073137580596
+    - type: manhattan_f1
+      value: 71.3942611553533
+    - type: manhattan_precision
+      value: 68.52705653967483
+    - type: manhattan_recall
+      value: 74.51187335092348
+    - type: max_accuracy
+      value: 87.48882398521786
+    - type: max_ap
+      value: 79.04326607602204
+    - type: max_f1
+      value: 71.64566826860633
+  - task:
+      type: PairClassification
+    dataset:
+      type: mteb/twitterurlcorpus-pairclassification
+      name: MTEB TwitterURLCorpus
+      config: default
+      split: test
+      revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
+    metrics:
+    - type: cos_sim_accuracy
+      value: 88.68125897465751
+    - type: cos_sim_ap
+      value: 85.6003454431979
+    - type: cos_sim_f1
+      value: 77.6957163958641
+    - type: cos_sim_precision
+      value: 73.0110366307807
+    - type: cos_sim_recall
+      value: 83.02279026793964
+    - type: dot_accuracy
+      value: 87.7672992587418
+    - type: dot_ap
+      value: 82.4971301112899
+    - type: dot_f1
+      value: 75.90528233151184
+    - type: dot_precision
+      value: 72.0370626469368
+    - type: dot_recall
+      value: 80.21250384970742
+    - type: euclidean_accuracy
+      value: 88.4503434625684
+    - type: euclidean_ap
+      value: 84.91949884748384
+    - type: euclidean_f1
+      value: 76.92365018444684
+    - type: euclidean_precision
+      value: 74.53245721712759
+    - type: euclidean_recall
+      value: 79.47336002463813
+    - type: manhattan_accuracy
+      value: 88.47556952691427
+    - type: manhattan_ap
+      value: 84.8963689101517
+    - type: manhattan_f1
+      value: 76.85901249256395
+    - type: manhattan_precision
+      value: 74.31693989071039
+    - type: manhattan_recall
+      value: 79.58115183246073
+    - type: max_accuracy
+      value: 88.68125897465751
+    - type: max_ap
+      value: 85.6003454431979
+    - type: max_f1
+      value: 77.6957163958641
+license: mit
+language:
+- en
+---
+<h1 align="center">FlagEmbedding</h1>
+<h4 align="center">
+    <p>
+        <a href=#model-list>Model List</a> |
+        <a href=#frequently-asked-questions>FAQ</a> |
+        <a href=#usage>Usage</a>  |
+        <a href="#evaluation">Evaluation</a> |
+        <a href="#train">Train</a> |
+        <a href="#contact">Contact</a> |
+        <a href="#citation">Citation</a> |
+        <a href="#license">License</a>
+    <p>
+</h4>
+For more details please refer to our Github: [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding).
+If you are looking for a model that supports more languages, longer texts, and other retrieval methods, you can try using [bge-m3](https://huggingface.co/BAAI/bge-m3).
+[English](README.md) | [中文](https://github.com/FlagOpen/FlagEmbedding/blob/master/README_zh.md)
+FlagEmbedding focuses on retrieval-augmented LLMs, consisting of the following projects currently:
+- **Long-Context LLM**: [Activation Beacon](https://github.com/FlagOpen/FlagEmbedding/tree/master/Long_LLM/activation_beacon)
+- **Fine-tuning of LM** : [LM-Cocktail](https://github.com/FlagOpen/FlagEmbedding/tree/master/LM_Cocktail)
+- **Dense Retrieval**: [BGE-M3](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/BGE_M3), [LLM Embedder](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/llm_embedder), [BGE Embedding](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/baai_general_embedding)
+- **Reranker Model**: [BGE Reranker](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/reranker)
+- **Benchmark**: [C-MTEB](https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB)
+## News
+- 1/30/2024: Release **BGE-M3**, a new member to BGE model series! M3 stands for **M**ulti-linguality (100+ languages), **M**ulti-granularities (input length up to 8192), **M**ulti-Functionality (unification of dense, lexical, multi-vec/colbert retrieval).
+It is the first embedding model that supports all three retrieval methods, achieving new SOTA on multi-lingual (MIRACL) and cross-lingual (MKQA) benchmarks.
+[Technical Report](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/BGE_M3/BGE_M3.pdf) and [Code](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/BGE_M3). :fire:
+- 1/9/2024: Release [Activation-Beacon](https://github.com/FlagOpen/FlagEmbedding/tree/master/Long_LLM/activation_beacon), an effective, efficient, compatible, and low-cost (training) method to extend the context length of LLM. [Technical Report](https://arxiv.org/abs/2401.03462) :fire:
+- 12/24/2023: Release **LLaRA**, a LLaMA-7B based dense retriever, leading to state-of-the-art performances on MS MARCO and BEIR. Model and code will be open-sourced. Please stay tuned. [Technical Report](https://arxiv.org/abs/2312.15503) :fire:
+- 11/23/2023: Release [LM-Cocktail](https://github.com/FlagOpen/FlagEmbedding/tree/master/LM_Cocktail), a method to maintain general capabilities during fine-tuning by merging multiple language models. [Technical Report](https://arxiv.org/abs/2311.13534) :fire:
+- 10/12/2023: Release [LLM-Embedder](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/llm_embedder), a unified embedding model to support diverse retrieval augmentation needs for LLMs. [Technical Report](https://arxiv.org/pdf/2310.07554.pdf)
+- 09/15/2023: The [technical report](https://arxiv.org/pdf/2309.07597.pdf) and [massive training data](https://data.baai.ac.cn/details/BAAI-MTP) of BGE has been released
+- 09/12/2023: New models:
+    - **New reranker model**: release cross-encoder models `BAAI/bge-reranker-base` and `BAAI/bge-reranker-large`, which are more powerful than embedding model. We recommend to use/fine-tune them to re-rank top-k documents returned by embedding models.
+    - **update embedding model**: release `bge-*-v1.5` embedding model to alleviate the issue of the similarity distribution, and enhance its retrieval ability without instruction.
+<details>
+  <summary>More</summary>
+<!-- ### More -->
+- 09/07/2023: Update [fine-tune code](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md): Add script to mine hard negatives and support adding instruction during fine-tuning.
+- 08/09/2023: BGE Models are integrated into **Langchain**, you can use it like [this](#using-langchain); C-MTEB **leaderboard** is [available](https://huggingface.co/spaces/mteb/leaderboard).
+- 08/05/2023: Release base-scale and small-scale models, **best performance among the models of the same size 🤗**
+- 08/02/2023: Release `bge-large-*`(short for BAAI General Embedding) Models, **rank 1st on MTEB and C-MTEB benchmark!** :tada: :tada:
+- 08/01/2023: We release the [Chinese Massive Text Embedding Benchmark](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB) (**C-MTEB**), consisting of 31 test dataset.
+</details>
+## Model List
+`bge` is short for `BAAI general embedding`.
+|              Model              | Language | | Description | query instruction for retrieval [1] |
+|:-------------------------------|:--------:| :--------:| :--------:|:--------:|
+| [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3)                   |    Multilingual     |    [Inference](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/BGE_M3#usage) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/BGE_M3)    | Multi-Functionality(dense retrieval, sparse retrieval, multi-vector(colbert)), Multi-Linguality, and Multi-Granularity(8192 tokens) |  |
+|  [BAAI/llm-embedder](https://huggingface.co/BAAI/llm-embedder)  |   English | [Inference](./FlagEmbedding/llm_embedder/README.md) [Fine-tune](./FlagEmbedding/llm_embedder/README.md) | a unified embedding model to support diverse retrieval augmentation needs for LLMs | See [README](./FlagEmbedding/llm_embedder/README.md) |
+|  [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large)  |   Chinese and English | [Inference](#usage-for-reranker) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker) | a cross-encoder model which is more accurate but less efficient [2] |   |
+|  [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) |   Chinese and English | [Inference](#usage-for-reranker) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker) | a cross-encoder model which is more accurate but less efficient [2] |   |
+|  [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) |   English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `Represent this sentence for searching relevant passages: `  |
+|  [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) |   English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `Represent this sentence for searching relevant passages: `  |
+|  [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) |   English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution  | `Represent this sentence for searching relevant passages: `  |
+|  [BAAI/bge-large-zh-v1.5](https://huggingface.co/BAAI/bge-large-zh-v1.5) |   Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `为这个句子生成表示以用于检索相关文章：`  |
+|  [BAAI/bge-base-zh-v1.5](https://huggingface.co/BAAI/bge-base-zh-v1.5) |   Chinese |  [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `为这个句子生成表示以用于检索相关文章：`  |
+|  [BAAI/bge-small-zh-v1.5](https://huggingface.co/BAAI/bge-small-zh-v1.5) |   Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `为这个句子生成表示以用于检索相关文章：`  |
+|  [BAAI/bge-large-en](https://huggingface.co/BAAI/bge-large-en) |   English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | :trophy: rank **1st** in [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard | `Represent this sentence for searching relevant passages: `  |
+|  [BAAI/bge-base-en](https://huggingface.co/BAAI/bge-base-en) |   English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | a base-scale model but with similar ability to `bge-large-en` | `Represent this sentence for searching relevant passages: `  |
+|  [BAAI/bge-small-en](https://huggingface.co/BAAI/bge-small-en) |   English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) |a small-scale model but with competitive performance  | `Represent this sentence for searching relevant passages: `  |
+|  [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh) |   Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | :trophy: rank **1st** in [C-MTEB](https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB) benchmark | `为这个句子生成表示以用于检索相关文章：`  |
+|  [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh) |   Chinese |  [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | a base-scale model but with similar ability to `bge-large-zh` | `为这个句子生成表示以用于检索相关文章：`  |
+|  [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh) |   Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | a small-scale model but with competitive performance | `为这个句子生成表示以用于检索相关文章：`  |
+[1\]: If you need to search the relevant passages to a query, we suggest to add the instruction to the query; in other cases, no instruction is needed, just use the original query directly. In all cases, **no instruction** needs to be added to passages.
+[2\]: Different from embedding model, reranker uses question and document as input and directly output similarity instead of embedding. To balance the accuracy and time cost, cross-encoder is widely used to re-rank top-k documents retrieved by other simple models.
+For examples, use bge embedding model to retrieve top 100 relevant documents, and then use bge reranker to re-rank the top 100 document to get the final top-3 results.
+All models have been uploaded to Huggingface Hub, and you can see them at https://huggingface.co/BAAI.
+If you cannot open the Huggingface Hub, you also can download the models at https://model.baai.ac.cn/models .
+## Frequently asked questions
+<details>
+  <summary>1. How to fine-tune bge embedding model?</summary>
+  <!-- ### How to fine-tune bge embedding model? -->
+Following this [example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) to prepare data and fine-tune your model.
+Some suggestions:
+- Mine hard negatives following this [example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune#hard-negatives), which can improve the retrieval performance.
+- If you pre-train bge on your data, the pre-trained model cannot be directly used to calculate similarity, and it must be fine-tuned with contrastive learning before computing similarity.
+- If the accuracy of the fine-tuned model is still not high, it is recommended to use/fine-tune the cross-encoder model (bge-reranker) to re-rank top-k results. Hard negatives also are needed to fine-tune reranker.
+</details>
+<details>
+  <summary>2. The similarity score between two dissimilar sentences is higher than 0.5</summary>
+  <!-- ### The similarity score between two dissimilar sentences is higher than 0.5 -->
+**Suggest to use bge v1.5, which alleviates the issue of the similarity distribution.**
+Since we finetune the models by contrastive learning with a temperature of 0.01,
+the similarity distribution of the current BGE model is about in the interval \[0.6, 1\].
+So a similarity score greater than 0.5 does not indicate that the two sentences are similar.
+For downstream tasks, such as passage retrieval or semantic similarity,
+**what matters is the relative order of the scores, not the absolute value.**
+If you need to filter similar sentences based on a similarity threshold,
+please select an appropriate similarity threshold based on the similarity distribution on your data (such as 0.8, 0.85, or even 0.9).
+</details>
+<details>
+  <summary>3. When does the query instruction need to be used</summary>
+  <!-- ### When does the query instruction need to be used -->
+For the `bge-*-v1.5`, we improve its retrieval ability when not using instruction.
+No instruction only has a slight degradation in retrieval performance compared with using instruction.
+So you can generate embedding without instruction in all cases for convenience.
+For a retrieval task that uses short queries to find long related documents,
+it is recommended to add instructions for these short queries.
+**The best method to decide whether to add instructions for queries is choosing the setting that achieves better performance on your task.**
+In all cases, the documents/passages do not need to add the instruction.
+</details>
+## Usage
+### Usage for Embedding Model
+Here are some examples for using `bge` models with
+[FlagEmbedding](#using-flagembedding), [Sentence-Transformers](#using-sentence-transformers), [Langchain](#using-langchain), or [Huggingface Transformers](#using-huggingface-transformers).
+#### Using FlagEmbedding
+```
+pip install -U FlagEmbedding
+```
+If it doesn't work for you, you can see [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md) for more methods to install FlagEmbedding.
+```python
+from FlagEmbedding import FlagModel
+sentences_1 = ["样例数据-1", "样例数据-2"]
+sentences_2 = ["样例数据-3", "样例数据-4"]
+model = FlagModel('BAAI/bge-large-zh-v1.5',
+                  query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章：",
+                  use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation
+embeddings_1 = model.encode(sentences_1)
+embeddings_2 = model.encode(sentences_2)
+similarity = embeddings_1 @ embeddings_2.T
+print(similarity)
+# for s2p(short query to long passage) retrieval task, suggest to use encode_queries() which will automatically add the instruction to each query
+# corpus in retrieval task can still use encode() or encode_corpus(), since they don't need instruction
+queries = ['query_1', 'query_2']
+passages = ["样例文档-1", "样例文档-2"]
+q_embeddings = model.encode_queries(queries)
+p_embeddings = model.encode(passages)
+scores = q_embeddings @ p_embeddings.T
+```
+For the value of the argument `query_instruction_for_retrieval`, see [Model List](https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list).
+By default, FlagModel will use all available GPUs when encoding. Please set `os.environ["CUDA_VISIBLE_DEVICES"]` to select specific GPUs.
+You also can set `os.environ["CUDA_VISIBLE_DEVICES"]=""` to make all GPUs unavailable.
+#### Using Sentence-Transformers
+You can also use the `bge` models with [sentence-transformers](https://www.SBERT.net):
+```
+pip install -U sentence-transformers
+```
+```python
+from sentence_transformers import SentenceTransformer
+sentences_1 = ["样例数据-1", "样例数据-2"]
+sentences_2 = ["样例数据-3", "样例数据-4"]
+model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
+embeddings_1 = model.encode(sentences_1, normalize_embeddings=True)
+embeddings_2 = model.encode(sentences_2, normalize_embeddings=True)
+similarity = embeddings_1 @ embeddings_2.T
+print(similarity)
+```
+For s2p(short query to long passage) retrieval task,
+each short query should start with an instruction (instructions see [Model List](https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list)).
+But the instruction is not needed for passages.
+```python
+from sentence_transformers import SentenceTransformer
+queries = ['query_1', 'query_2']
+passages = ["样例文档-1", "样例文档-2"]
+instruction = "为这个句子生成表示以用于检索相关文章："
+model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
+q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
+p_embeddings = model.encode(passages, normalize_embeddings=True)
+scores = q_embeddings @ p_embeddings.T
+```
+#### Using Langchain
+You can use `bge` in langchain like this:
+```python
+from langchain.embeddings import HuggingFaceBgeEmbeddings
+model_name = "BAAI/bge-large-en-v1.5"
+model_kwargs = {'device': 'cuda'}
+encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
+model = HuggingFaceBgeEmbeddings(
+    model_name=model_name,
+    model_kwargs=model_kwargs,
+    encode_kwargs=encode_kwargs,
+    query_instruction="为这个句子生成表示以用于检索相关文章："
+)
+model.query_instruction = "为这个句子生成表示以用于检索相关文章："
+```
+#### Using HuggingFace Transformers
+With the transformers package, you can use the model like this: First, you pass your input through the transformer model, then you select the last hidden state of the first token (i.e., [CLS]) as the sentence embedding.
+```python
+from transformers import AutoTokenizer, AutoModel
+import torch
+# Sentences we want sentence embeddings for
+sentences = ["样例数据-1", "样例数据-2"]
+# Load model from HuggingFace Hub
+tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-large-zh-v1.5')
+model = AutoModel.from_pretrained('BAAI/bge-large-zh-v1.5')
+model.eval()
+# Tokenize sentences
+encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
+# for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)
+# encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
+# Compute token embeddings
+with torch.no_grad():
+    model_output = model(**encoded_input)
+    # Perform pooling. In this case, cls pooling.
+    sentence_embeddings = model_output[0][:, 0]
+# normalize embeddings
+sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
+print("Sentence embeddings:", sentence_embeddings)
+```
+#### Usage of the ONNX files
+```python
+from optimum.onnxruntime import ORTModelForFeatureExtraction  # type: ignore
+import torch
+from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-large-en-v1.5')
+model = AutoModel.from_pretrained('BAAI/bge-large-en-v1.5', revision="refs/pr/13")
+model_ort = ORTModelForFeatureExtraction.from_pretrained('BAAI/bge-large-en-v1.5', revision="refs/pr/13",file_name="onnx/model.onnx")
+# Sentences we want sentence embeddings for
+sentences = ["样例数据-1", "样例数据-2"]
+# Tokenize sentences
+encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
+# for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)
+# encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
+model_output_ort = model_ort(**encoded_input)
+# Compute token embeddings
+with torch.no_grad():
+    model_output = model(**encoded_input)
+# model_output and model_output_ort are identical
+```
+Its also possible to deploy the onnx files with the [infinity_emb](https://github.com/michaelfeil/infinity) pip package.
+```python
+import asyncio
+from infinity_emb import AsyncEmbeddingEngine, EngineArgs
+sentences = ["Embed this is sentence via Infinity.", "Paris is in France."]
+engine = AsyncEmbeddingEngine.from_args(
+    EngineArgs(model_name_or_path = "BAAI/bge-large-en-v1.5", device="cpu", engine="optimum" # or engine="torch"
+))
+async def main():
+    async with engine:
+        embeddings, usage = await engine.embed(sentences=sentences)
+asyncio.run(main())
+```
+### Usage for Reranker
+Different from embedding model, reranker uses question and document as input and directly output similarity instead of embedding.
+You can get a relevance score by inputting query and passage to the reranker.
+The reranker is optimized based cross-entropy loss, so the relevance score is not bounded to a specific range.
+#### Using FlagEmbedding
+```
+pip install -U FlagEmbedding
+```
+Get relevance scores (higher scores indicate more relevance):
+```python
+from FlagEmbedding import FlagReranker
+reranker = FlagReranker('BAAI/bge-reranker-large', use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation
+score = reranker.compute_score(['query', 'passage'])
+print(score)
+scores = reranker.compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
+print(scores)
+```
+#### Using Huggingface transformers
+```python
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-reranker-large')
+model = AutoModelForSequenceClassification.from_pretrained('BAAI/bge-reranker-large')
+model.eval()
+pairs = [['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']]
+with torch.no_grad():
+    inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
+    scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
+    print(scores)
+```
+## Evaluation
+`baai-general-embedding` models achieve **state-of-the-art performance on both MTEB and C-MTEB leaderboard!**
+For more details and evaluation tools see our [scripts](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md).
+- **MTEB**:
+| Model Name |  Dimension | Sequence Length | Average (56) | Retrieval (15) |Clustering (11) | Pair Classification (3) | Reranking (4) |  STS (10) | Summarization (1) | Classification (12) |
+|:----:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
+| [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) | 1024 | 512 |  **64.23** | **54.29** |  46.08 | 87.12 | 60.03 | 83.11 | 31.61 | 75.97 |
+| [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) |  768 | 512 | 63.55 | 53.25 |   45.77 | 86.55 | 58.86 | 82.4 | 31.07 | 75.53 |
+| [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) |  384 | 512 | 62.17 |51.68 | 43.82 |  84.92 | 58.36 | 81.59 | 30.12 | 74.14 |
+| [bge-large-en](https://huggingface.co/BAAI/bge-large-en) |  1024 | 512 | 63.98 |  53.9 | 46.98 | 85.8 | 59.48 | 81.56 | 32.06 | 76.21 |
+| [bge-base-en](https://huggingface.co/BAAI/bge-base-en) |  768 | 512 |  63.36 | 53.0 | 46.32 | 85.86 | 58.7 | 81.84 | 29.27 | 75.27 |
+| [gte-large](https://huggingface.co/thenlper/gte-large) |  1024 | 512 | 63.13 | 52.22 | 46.84 | 85.00 | 59.13 | 83.35 | 31.66 | 73.33 |
+| [gte-base](https://huggingface.co/thenlper/gte-base) 	|  768 | 512 | 62.39 | 51.14 | 46.2 | 84.57 | 58.61 | 82.3 | 31.17 | 73.01 |
+| [e5-large-v2](https://huggingface.co/intfloat/e5-large-v2) |  1024| 512 | 62.25 | 50.56 | 44.49 | 86.03 | 56.61 | 82.05 | 30.19 | 75.24 |
+| [bge-small-en](https://huggingface.co/BAAI/bge-small-en) |  384 | 512 | 62.11 |  51.82 | 44.31 | 83.78 | 57.97 | 80.72 | 30.53 | 74.37 |
+| [instructor-xl](https://huggingface.co/hkunlp/instructor-xl) |  768 | 512 | 61.79 | 49.26 | 44.74 | 86.62 | 57.29 | 83.06 | 32.32 | 61.79 |
+| [e5-base-v2](https://huggingface.co/intfloat/e5-base-v2) |  768 | 512 | 61.5 | 50.29 | 43.80 | 85.73 | 55.91 | 81.05 | 30.28 | 73.84 |
+| [gte-small](https://huggingface.co/thenlper/gte-small) |  384 | 512 | 61.36 | 49.46 | 44.89 | 83.54 | 57.7 | 82.07 | 30.42 | 72.31 |
+| [text-embedding-ada-002](https://platform.openai.com/docs/guides/embeddings) | 1536 | 8192 | 60.99 | 49.25 | 45.9 | 84.89 | 56.32 | 80.97 | 30.8 | 70.93 |
+| [e5-small-v2](https://huggingface.co/intfloat/e5-base-v2) | 384 | 512 | 59.93 | 49.04 | 39.92 | 84.67 | 54.32 | 80.39 | 31.16 | 72.94 |
+| [sentence-t5-xxl](https://huggingface.co/sentence-transformers/sentence-t5-xxl) |  768 | 512 | 59.51 | 42.24 | 43.72 | 85.06 | 56.42 | 82.63 | 30.08 | 73.42 |
+| [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) 	|  768 | 514 	| 57.78 | 43.81 | 43.69 | 83.04 | 59.36 | 80.28 | 27.49 | 65.07 |
+| [sgpt-bloom-7b1-msmarco](https://huggingface.co/bigscience/sgpt-bloom-7b1-msmarco) 	|  4096 | 2048 | 57.59 | 48.22 | 38.93 | 81.9 | 55.65 | 77.74 | 33.6 | 66.19 |
+- **C-MTEB**:
+We create the benchmark C-MTEB for Chinese text embedding which consists of 31 datasets from 6 tasks.
+Please refer to [C_MTEB](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md) for a detailed introduction.
+| Model | Embedding dimension | Avg | Retrieval | STS | PairClassification | Classification | Reranking | Clustering |
+|:-------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
+| [**BAAI/bge-large-zh-v1.5**](https://huggingface.co/BAAI/bge-large-zh-v1.5) | 1024 |  **64.53** | 70.46 | 56.25 | 81.6 | 69.13 | 65.84 | 48.99 |
+| [BAAI/bge-base-zh-v1.5](https://huggingface.co/BAAI/bge-base-zh-v1.5) | 768 |  63.13 | 69.49 | 53.72 | 79.75 | 68.07 | 65.39 | 47.53 |
+| [BAAI/bge-small-zh-v1.5](https://huggingface.co/BAAI/bge-small-zh-v1.5) | 512 | 57.82 | 61.77 | 49.11 | 70.41 | 63.96 | 60.92 | 44.18 |
+| [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh) | 1024 | 64.20 | 71.53 | 54.98 | 78.94 | 68.32 | 65.11 | 48.39 |
+| [bge-large-zh-noinstruct](https://huggingface.co/BAAI/bge-large-zh-noinstruct) | 1024 | 63.53 | 70.55 | 53 | 76.77 | 68.58 | 64.91 | 50.01 |
+| [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh) | 768 | 62.96 | 69.53 | 54.12 | 77.5 | 67.07 | 64.91 | 47.63 |
+| [multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) | 1024 | 58.79 | 63.66 | 48.44 | 69.89 | 67.34 | 56.00 | 48.23 |
+| [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh) | 512 | 58.27 |  63.07 | 49.45 | 70.35 | 63.64 | 61.48 | 45.09 |
+| [m3e-base](https://huggingface.co/moka-ai/m3e-base) | 768 | 57.10 | 56.91 | 50.47 | 63.99 | 67.52 | 59.34 | 47.68 |
+| [m3e-large](https://huggingface.co/moka-ai/m3e-large) | 1024 |  57.05 | 54.75 | 50.42 | 64.3 | 68.2 | 59.66 | 48.88 |
+| [multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) | 768 | 55.48 | 61.63 | 46.49 | 67.07 | 65.35 | 54.35 | 40.68 |
+| [multilingual-e5-small](https://huggingface.co/intfloat/multilingual-e5-small) | 384 | 55.38 | 59.95 | 45.27 | 66.45 | 65.85 | 53.86 | 45.26 |
+| [text-embedding-ada-002(OpenAI)](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) | 1536 |  53.02 | 52.0 | 43.35 | 69.56 | 64.31 | 54.28 | 45.68 |
+| [luotuo](https://huggingface.co/silk-road/luotuo-bert-medium) | 1024 | 49.37 |  44.4 | 42.78 | 66.62 | 61 | 49.25 | 44.39 |
+| [text2vec-base](https://huggingface.co/shibing624/text2vec-base-chinese) | 768 |  47.63 | 38.79 | 43.41 | 67.41 | 62.19 | 49.45 | 37.66 |
+| [text2vec-large](https://huggingface.co/GanymedeNil/text2vec-large-chinese) | 1024 | 47.36 | 41.94 | 44.97 | 70.86 | 60.66 | 49.16 | 30.02 |
+- **Reranking**:
+See [C_MTEB](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/) for evaluation script.
+| Model | T2Reranking | T2RerankingZh2En\* | T2RerankingEn2Zh\* | MMarcoReranking | CMedQAv1 | CMedQAv2 | Avg |
+|:-------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
+| text2vec-base-multilingual | 64.66 | 62.94 | 62.51 | 14.37 | 48.46 | 48.6 | 50.26 |
+| multilingual-e5-small | 65.62 | 60.94 | 56.41 | 29.91 | 67.26 | 66.54 | 57.78 |
+| multilingual-e5-large | 64.55 | 61.61 | 54.28 | 28.6 | 67.42 | 67.92 | 57.4 |
+| multilingual-e5-base | 64.21 | 62.13 | 54.68 | 29.5 | 66.23 | 66.98 | 57.29 |
+| m3e-base | 66.03 | 62.74 | 56.07 | 17.51 | 77.05 | 76.76 | 59.36 |
+| m3e-large | 66.13 | 62.72 | 56.1 | 16.46 | 77.76 | 78.27 | 59.57 |
+| bge-base-zh-v1.5 | 66.49 | 63.25 | 57.02 | 29.74 | 80.47 | 84.88 | 63.64 |
+| bge-large-zh-v1.5 | 65.74 | 63.39 | 57.03 | 28.74 | 83.45 | 85.44 | 63.97 |
+| [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | 67.28 | 63.95 | 60.45 | 35.46 | 81.26 | 84.1 | 65.42 |
+| [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large) | 67.6 | 64.03 | 61.44 | 37.16 | 82.15 | 84.18 | 66.09 |
+\* : T2RerankingZh2En and T2RerankingEn2Zh are cross-language retrieval tasks
+## Train
+### BAAI Embedding
+We pre-train the models using [retromae](https://github.com/staoxiao/RetroMAE) and train them on large-scale pairs data using contrastive learning.
+**You can fine-tune the embedding model on your data following our [examples](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune).**
+We also provide a [pre-train example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/pretrain).
+Note that the goal of pre-training is to reconstruct the text, and the pre-trained model cannot be used for similarity calculation directly, it needs to be fine-tuned.
+More training details for bge see [baai_general_embedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md).
+### BGE Reranker
+Cross-encoder will perform full-attention over the input pair,
+which is more accurate than embedding model (i.e., bi-encoder) but more time-consuming than embedding model.
+Therefore, it can be used to re-rank the top-k documents returned by embedding model.
+We train the cross-encoder on a multilingual pair data,
+The data format is the same as embedding model, so you can fine-tune it easily following our [example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker).
+More details please refer to [./FlagEmbedding/reranker/README.md](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/reranker)
+## Contact
+If you have any question or suggestion related to this project, feel free to open an issue or pull request.
+You also can email Shitao Xiao([email protected]) and Zheng Liu([email protected]).
+## Citation
+If you find this repository useful, please consider giving a star :star: and citation
+```
+@misc{bge_embedding,
+      title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
+      author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
+      year={2023},
+      eprint={2309.07597},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```
+## License
+FlagEmbedding is licensed under the [MIT License](https://github.com/FlagOpen/FlagEmbedding/blob/master/LICENSE). The released models can be used for commercial purposes free of charge.

bge-large-en-v1.5/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_name_or_path": "/root/.cache/torch/sentence_transformers/BAAI_bge-large-en/",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

bge-large-en-v1.5/config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "__version__": {
+    "sentence_transformers": "2.2.2",
+    "transformers": "4.28.1",
+    "pytorch": "1.13.0+cu117"
+  }
+}

bge-large-en-v1.5/modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

bge-large-en-v1.5/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51e14ed95fb897ba8eee3c6d9b5fb4323229a897caaf34053c1b7639b31c1ac4
+size 1340698349

bge-large-en-v1.5/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": true
+}

bge-large-en-v1.5/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

bge-large-en-v1.5/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

bge-large-en-v1.5/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

bge-large-en-v1.5/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

colbertv2.0/README.md ADDED Viewed

	@@ -0,0 +1,215 @@

+---
+license: mit
+language:
+- en
+tags:
+- ColBERT
+---
+<p align="center">
+  <img align="center" src="https://github.com/stanford-futuredata/ColBERT/blob/main/docs/images/colbertofficial.png?raw=true" width="430px" />
+</p>
+<p align="left">
+# ColBERT (v2)
+### ColBERT is a _fast_ and _accurate_ retrieval model, enabling scalable BERT-based search over large text collections in tens of milliseconds.
+[<img align="center" src="https://colab.research.google.com/assets/colab-badge.svg" />](https://colab.research.google.com/github/stanford-futuredata/ColBERT/blob/main/docs/intro2new.ipynb)
+<p align="center">
+  <img align="center" src="https://github.com/stanford-futuredata/ColBERT/blob/main/docs/images/ColBERT-Framework-MaxSim-W370px.png?raw=true" />
+</p>
+<p align="center">
+  <b>Figure 1:</b> ColBERT's late interaction, efficiently scoring the fine-grained similarity between a queries and a passage.
+</p>
+As Figure 1 illustrates, ColBERT relies on fine-grained **contextual late interaction**: it encodes each passage into a **matrix** of token-level embeddings (shown above in blue). Then at search time, it embeds every query into another matrix (shown in green) and efficiently finds passages that contextually match the query using scalable vector-similarity (`MaxSim`) operators.
+These rich interactions allow ColBERT to surpass the quality of _single-vector_ representation models, while scaling efficiently to large corpora. You can read more in our papers:
+* [**ColBERT: Efficient and Effective Passage Search via Contextualized Late Interaction over BERT**](https://arxiv.org/abs/2004.12832) (SIGIR'20).
+* [**Relevance-guided Supervision for OpenQA with ColBERT**](https://arxiv.org/abs/2007.00814) (TACL'21).
+* [**Baleen: Robust Multi-Hop Reasoning at Scale via Condensed Retrieval**](https://arxiv.org/abs/2101.00436) (NeurIPS'21).
+* [**ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction**](https://arxiv.org/abs/2112.01488) (NAACL'22).
+* [**PLAID: An Efficient Engine for Late Interaction Retrieval**](https://arxiv.org/abs/2205.09707) (CIKM'22).
+----
+## 🚨 **Announcements**
+* (1/29/23) We have merged a new index updater feature and support for additional Hugging Face models! These are in beta so please give us feedback as you try them out.
+* (1/24/23) If you're looking for the **DSP** framework for composing ColBERTv2 and LLMs, it's at: https://github.com/stanfordnlp/dsp
+----
+## ColBERTv1
+The ColBERTv1 code from the SIGIR'20 paper is in the [`colbertv1` branch](https://github.com/stanford-futuredata/ColBERT/tree/colbertv1). See [here](#branches) for more information on other branches.
+## Installation
+ColBERT requires Python 3.7+ and Pytorch 1.9+ and uses the [Hugging Face Transformers](https://github.com/huggingface/transformers) library.
+We strongly recommend creating a conda environment using the commands below. (If you don't have conda, follow the official [conda installation guide](https://docs.anaconda.com/anaconda/install/linux/#installation).)
+We have also included a new environment file specifically for CPU-only environments (`conda_env_cpu.yml`), but note that if you are testing CPU execution on a machine that includes GPUs you might need to specify `CUDA_VISIBLE_DEVICES=""` as part of your command. Note that a GPU is required for training and indexing.
+```
+conda env create -f conda_env[_cpu].yml
+conda activate colbert
+```
+If you face any problems, please [open a new issue](https://github.com/stanford-futuredata/ColBERT/issues) and we'll help you promptly!
+## Overview
+Using ColBERT on a dataset typically involves the following steps.
+**Step 0: Preprocess your collection.** At its simplest, ColBERT works with tab-separated (TSV) files: a file (e.g., `collection.tsv`) will contain all passages and another (e.g., `queries.tsv`) will contain a set of queries for searching the collection.
+**Step 1: Download the [pre-trained ColBERTv2 checkpoint](https://downloads.cs.stanford.edu/nlp/data/colbert/colbertv2/colbertv2.0.tar.gz).** This checkpoint has been trained on the MS MARCO Passage Ranking task. You can also _optionally_ [train your own ColBERT model](#training).
+**Step 2: Index your collection.** Once you have a trained ColBERT model, you need to [index your collection](#indexing) to permit fast retrieval. This step encodes all passages into matrices, stores them on disk, and builds data structures for efficient search.
+**Step 3: Search the collection with your queries.** Given the model and index, you can [issue queries over the collection](#retrieval) to retrieve the top-k passages for each query.
+Below, we illustrate these steps via an example run on the MS MARCO Passage Ranking task.
+## API Usage Notebook
+**NEW**: We have an experimental notebook on [Google Colab](https://colab.research.google.com/github/stanford-futuredata/ColBERT/blob/main/docs/intro2new.ipynb) that you can use with free GPUs. Indexing 10,000 on the free Colab T4 GPU takes six minutes.
+This Jupyter notebook **[docs/intro.ipynb notebook](docs/intro.ipynb)** illustrates using the key features of ColBERT with the new Python API.
+It includes how to download the ColBERTv2 model checkpoint trained on MS MARCO Passage Ranking and how to download our new LoTTE benchmark.
+## Data
+This repository works directly with a simple **tab-separated file** format to store queries, passages, and top-k ranked lists.
+* Queries: each line is `qid \t query text`.
+* Collection: each line is `pid \t passage text`.
+* Top-k Ranking: each line is `qid \t pid \t rank`.
+This works directly with the data format of the [MS MARCO Passage Ranking](https://github.com/microsoft/MSMARCO-Passage-Ranking) dataset. You will need the training triples (`triples.train.small.tar.gz`), the official top-1000 ranked lists for the dev set queries (`top1000.dev`), and the dev set relevant passages (`qrels.dev.small.tsv`). For indexing the full collection, you will also need the list of passages (`collection.tar.gz`).
+## Indexing
+For fast retrieval, indexing precomputes the ColBERT representations of passages.
+Example usage:
+```python
+from colbert.infra import Run, RunConfig, ColBERTConfig
+from colbert import Indexer
+if __name__=='__main__':
+    with Run().context(RunConfig(nranks=1, experiment="msmarco")):
+        config = ColBERTConfig(
+            nbits=2,
+            root="/path/to/experiments",
+        )
+        indexer = Indexer(checkpoint="/path/to/checkpoint", config=config)
+        indexer.index(name="msmarco.nbits=2", collection="/path/to/MSMARCO/collection.tsv")
+```
+## Retrieval
+We typically recommend that you use ColBERT for **end-to-end** retrieval, where it directly finds its top-k passages from the full collection:
+```python
+from colbert.data import Queries
+from colbert.infra import Run, RunConfig, ColBERTConfig
+from colbert import Searcher
+if __name__=='__main__':
+    with Run().context(RunConfig(nranks=1, experiment="msmarco")):
+        config = ColBERTConfig(
+            root="/path/to/experiments",
+        )
+        searcher = Searcher(index="msmarco.nbits=2", config=config)
+        queries = Queries("/path/to/MSMARCO/queries.dev.small.tsv")
+        ranking = searcher.search_all(queries, k=100)
+        ranking.save("msmarco.nbits=2.ranking.tsv")
+```
+You can optionally specify the `ncells`, `centroid_score_threshold`, and `ndocs` search hyperparameters to trade off between speed and result quality. Defaults for different values of `k` are listed in colbert/searcher.py.
+We can evaluate the MSMARCO rankings using the following command:
+```
+python -m utility.evaluate.msmarco_passages --ranking "/path/to/msmarco.nbits=2.ranking.tsv" --qrels "/path/to/MSMARCO/qrels.dev.small.tsv"
+```
+## Training
+We provide a [pre-trained model checkpoint](https://downloads.cs.stanford.edu/nlp/data/colbert/colbertv2/colbertv2.0.tar.gz), but we also detail how to train from scratch here.
+Note that this example demonstrates the ColBERTv1 style of training, but the provided checkpoint was trained with ColBERTv2.
+Training requires a JSONL triples file with a `[qid, pid+, pid-]` list per line. The query IDs and passage IDs correspond to the specified `queries.tsv` and `collection.tsv` files respectively.
+Example usage (training on 4 GPUs):
+```python
+from colbert.infra import Run, RunConfig, ColBERTConfig
+from colbert import Trainer
+if __name__=='__main__':
+    with Run().context(RunConfig(nranks=4, experiment="msmarco")):
+        config = ColBERTConfig(
+            bsize=32,
+            root="/path/to/experiments",
+        )
+        trainer = Trainer(
+            triples="/path/to/MSMARCO/triples.train.small.tsv",
+            queries="/path/to/MSMARCO/queries.train.small.tsv",
+            collection="/path/to/MSMARCO/collection.tsv",
+            config=config,
+        )
+        checkpoint_path = trainer.train()
+        print(f"Saved checkpoint to {checkpoint_path}...")
+```
+## Running a lightweight ColBERTv2 server
+We provide a script to run a lightweight server which serves k (upto 100) results in ranked order for a given search query, in JSON format. This script can be used to power DSP programs.
+To run the server, update the environment variables `INDEX_ROOT` and `INDEX_NAME` in the `.env` file to point to the appropriate ColBERT index. The run the following command:
+```
+python server.py
+```
+A sample query:
+```
+http://localhost:8893/api/search?query=Who won the 2022 FIFA world cup&k=25
+```
+## Branches
+### Supported branches
+* [`main`](https://github.com/stanford-futuredata/ColBERT/tree/main): Stable branch with ColBERTv2 + PLAID.
+* [`colbertv1`](https://github.com/stanford-futuredata/ColBERT/tree/colbertv1): Legacy branch for ColBERTv1.
+### Deprecated branches
+* [`new_api`](https://github.com/stanford-futuredata/ColBERT/tree/new_api): Base ColBERTv2 implementation.
+* [`cpu_inference`](https://github.com/stanford-futuredata/ColBERT/tree/cpu_inference): ColBERTv2 implementation with CPU search support.
+* [`fast_search`](https://github.com/stanford-futuredata/ColBERT/tree/fast_search): ColBERTv2 implementation with PLAID.
+* [`binarization`](https://github.com/stanford-futuredata/ColBERT/tree/binarization): ColBERT with a baseline binarization-based compression strategy (as opposed to ColBERTv2's residual compression, which we found to be more robust).
+## Acknowledgments
+ColBERT logo designed by Chuyi Zhang.

colbertv2.0/artifact.metadata ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+    "nprobe": 2,
+    "ncandidates": 8192,
+    "index_path": null,
+    "nbits": 1,
+    "kmeans_niters": 20,
+    "similarity": "cosine",
+    "bsize": 8,
+    "accumsteps": 1,
+    "lr": 1e-5,
+    "maxsteps": 400000,
+    "save_every": null,
+    "resume": false,
+    "warmup": 20000,
+    "warmup_bert": null,
+    "relu": false,
+    "nway": 64,
+    "use_ib_negatives": true,
+    "query_maxlen": 32,
+    "attend_to_mask_tokens": false,
+    "dim": 128,
+    "doc_maxlen": 180,
+    "mask_punctuation": true,
+    "checkpoint": "\/future\/u\/okhattab\/root\/unit\/experiments\/2021.10\/none\/kld.v3.nway32.l2.ib\/checkpoints\/colbert-150000\/",
+    "triples": "\/future\/u\/okhattab\/root\/unit\/experiments\/2021.10\/downstream.distillation.round2.2_score\/round2.nway6.cosine.ib\/examples.64.json",
+    "collection": "\/future\/u\/okhattab\/data\/MSMARCO\/collection.tsv",
+    "queries": "\/future\/u\/okhattab\/data\/MSMARCO\/queries.train.tsv",
+    "index_name": null,
+    "overwrite": false,
+    "root": "\/future\/u\/okhattab\/root\/unit\/experiments",
+    "experiment": "2021.10",
+    "index_root": null,
+    "name": "kldR2.nway64.ib",
+    "rank": 0,
+    "nranks": 4,
+    "amp": true,
+    "gpus": 8,
+    "meta": {
+        "hostname": "future-hgx-1.stanford.edu",
+        "git_branch": "api_residual_compression3_KLD",
+        "git_hash": "62198f65e45bc7887eda4fdd57f9d3a103c72a7b",
+        "git_commit_datetime": "2021-10-29 07:05:32-07:00",
+        "current_datetime": "Oct 29, 2021 ;  9:05AM PDT (-0700)",
+        "cmd": "\/future\/u\/okhattab\/repos\/QolBERT2\/tests\/train_kldR2_nway64.py",
+        "version": "colbert-v0.4"
+    }
+}

colbertv2.0/config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "_name_or_path": "/future/u/okhattab/root/unit/experiments/2021.10/none/kld.v3.nway32.l2.ib/checkpoints/colbert-150000/",
+  "architectures": [
+    "HF_ColBERT"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.10.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

colbertv2.0/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26e4c2f9f95a3da4442252bb40d99e4fbfd098e733edac1d785c9937b8a278da
+size 438405935

colbertv2.0/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

colbertv2.0/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

colbertv2.0/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/future/u/okhattab/root/unit/experiments/2021.10/none/kld.v3.nway32.l2.ib/checkpoints/colbert-150000/", "tokenizer_class": "BertTokenizer"}

colbertv2.0/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

colpali-v1.1/.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

colpali-v1.1/README.md ADDED Viewed

	@@ -0,0 +1,166 @@

+---
+license: mit
+library_name: colpali
+base_model: vidore/colpaligemma-3b-mix-448-base
+language:
+- en
+tags:
+- colpali
+- vidore
+new_version: vidore/colpali-v1.2
+---
+# ColPali: Visual Retriever based on PaliGemma-3B with ColBERT strategy
+ColPali is a model based on a novel model architecture and training strategy based on Vision Language Models (VLMs) to efficiently index documents from their visual features.
+It is a [PaliGemma-3B](https://huggingface.co/google/paligemma-3b-mix-448) extension that generates [ColBERT](https://arxiv.org/abs/2004.12832)- style multi-vector representations of text and images.
+It was introduced in the paper [ColPali: Efficient Document Retrieval with Vision Language Models](https://arxiv.org/abs/2407.01449) and first released in [this repository](https://github.com/ManuelFay/colpali)
+<p align="center"><img width=800 src="https://github.com/illuin-tech/colpali/blob/main/assets/colpali_architecture.webp?raw=true"/></p>
+## Version specificity
+This version is trained with `colpali-engine==0.2.0`.
+Compared to `colpali`, this version is trained with right padding for queries to fix unwanted tokens in the query encoding.
+It also stems from the fixed `vidore/colpaligemma-3b-mix-448-base` to guarantee deterministic projection layer initialization.
+Data is the same as the ColPali data described in the paper.
+## Model Description
+This model is built iteratively starting from an off-the-shelf [SigLIP](https://huggingface.co/google/siglip-so400m-patch14-384) model.
+We finetuned it to create [BiSigLIP](https://huggingface.co/vidore/bisiglip) and fed the patch-embeddings output by SigLIP to an LLM, [PaliGemma-3B](https://huggingface.co/google/paligemma-3b-mix-448) to create [BiPali](https://huggingface.co/vidore/bipali).
+One benefit of inputting image patch embeddings through a language model is that they are natively mapped to a latent space similar to textual input (query).
+This enables leveraging the [ColBERT](https://arxiv.org/abs/2004.12832) strategy to compute interactions between text tokens and image patches, which enables a step-change improvement in performance compared to BiPali.
+## Model Training
+### Dataset
+Our training dataset of 127,460 query-page pairs is comprised of train sets of openly available academic datasets (63%) and a synthetic dataset made up of pages from web-crawled PDF documents and augmented with VLM-generated (Claude-3 Sonnet) pseudo-questions (37%).
+Our training set is fully English by design, enabling us to study zero-shot generalization to non-English languages. We explicitly verify no multi-page PDF document is used both [*ViDoRe*](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d) and in the train set to prevent evaluation contamination.
+A validation set is created with 2% of the samples to tune hyperparameters.
+*Note: Multilingual data is present in the pretraining corpus of the language model (Gemma-2B) and potentially occurs during PaliGemma-3B's multimodal training.*
+### Parameters
+All models are trained for 1 epoch on the train set. Unless specified otherwise, we train models in `bfloat16` format, use low-rank adapters ([LoRA](https://arxiv.org/abs/2106.09685))
+with `alpha=32`  and `r=32` on the transformer layers from the language model,
+as well as the final randomly initialized projection layer, and use a `paged_adamw_8bit` optimizer.
+We train on an 8 GPU setup with data parallelism, a learning rate of 5e-5 with linear decay with 2.5% warmup steps, and a batch size of 32.
+## Usage
+```python
+import torch
+import typer
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from transformers import AutoProcessor
+from PIL import Image
+from colpali_engine.models.paligemma_colbert_architecture import ColPali
+from colpali_engine.trainer.retrieval_evaluator import CustomEvaluator
+from colpali_engine.utils.colpali_processing_utils import process_images, process_queries
+from colpali_engine.utils.image_from_page_utils import load_from_dataset
+def main() -> None:
+    """Example script to run inference with ColPali"""
+    # Load model
+    model_name = "vidore/colpali-v1.1"
+    model = ColPali.from_pretrained("vidore/colpaligemma-3b-mix-448-base", torch_dtype=torch.bfloat16, device_map="cuda").eval()
+    model.load_adapter(model_name)
+    model = model.eval()
+    processor = AutoProcessor.from_pretrained(model_name)
+    # select images -> load_from_pdf(<pdf_path>),  load_from_image_urls(["<url_1>"]), load_from_dataset(<path>)
+    images = load_from_dataset("vidore/docvqa_test_subsampled")
+    queries = ["From which university does James V. Fiorca come ?", "Who is the japanese prime minister?"]
+    # run inference - docs
+    dataloader = DataLoader(
+        images,
+        batch_size=4,
+        shuffle=False,
+        collate_fn=lambda x: process_images(processor, x),
+    )
+    ds = []
+    for batch_doc in tqdm(dataloader):
+        with torch.no_grad():
+            batch_doc = {k: v.to(model.device) for k, v in batch_doc.items()}
+            embeddings_doc = model(**batch_doc)
+        ds.extend(list(torch.unbind(embeddings_doc.to("cpu"))))
+    # run inference - queries
+    dataloader = DataLoader(
+        queries,
+        batch_size=4,
+        shuffle=False,
+        collate_fn=lambda x: process_queries(processor, x, Image.new("RGB", (448, 448), (255, 255, 255))),
+    )
+    qs = []
+    for batch_query in dataloader:
+        with torch.no_grad():
+            batch_query = {k: v.to(model.device) for k, v in batch_query.items()}
+            embeddings_query = model(**batch_query)
+        qs.extend(list(torch.unbind(embeddings_query.to("cpu"))))
+    # run evaluation
+    retriever_evaluator = CustomEvaluator(is_multi_vector=True)
+    scores = retriever_evaluator.evaluate(qs, ds)
+    print(scores.argmax(axis=1))
+if __name__ == "__main__":
+    typer.run(main)
+```
+**Note:** If you need to further train ColPali from this adapter, you should run:
+```python
+lora_config = LoraConfig.from_pretrained("vidore/colpali-v1.1")
+lora_config.inference_mode = False  # force training mode for fine-tuning
+model = get_peft_model(model, lora_config)
+print("after")
+model.print_trainable_parameters()
+```
+## Limitations
+ - **Focus**: The model primarily focuses on PDF-type documents and high-ressources languages, potentially limiting its generalization to other document types or less represented languages.
+ - **Support**: The model relies on multi-vector retreiving derived from the ColBERT late interaction mechanism, which may require engineering efforts to adapt to widely used vector retrieval frameworks that lack native multi-vector support.
+## License
+ColPali's vision language backbone model (PaliGemma) is under `gemma` license as specified in its [model card](https://huggingface.co/google/paligemma-3b-mix-448). The adapters attached to the model are under MIT license.
+## Contact
+- Manuel Faysse: [email protected]
+- Hugues Sibille: [email protected]
+- Tony Wu: [email protected]
+## Citation
+If you use any datasets or models from this organization in your research, please cite the original dataset as follows:
+```bibtex
+@misc{faysse2024colpaliefficientdocumentretrieval,
+  title={ColPali: Efficient Document Retrieval with Vision Language Models},
+  author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
+  year={2024},
+  eprint={2407.01449},
+  archivePrefix={arXiv},
+  primaryClass={cs.IR},
+  url={https://arxiv.org/abs/2407.01449},
+}
+```

colpali-v1.1/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "checkpoint/colpaligemma-3b-mix-448-base",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": "(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)",
+  "task_type": "FEATURE_EXTRACTION",
+  "use_dora": false,
+  "use_rslora": false
+}

colpali-v1.1/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0cbdb6c0ebcb00260da3b1f6130f4a8892268f719b2d7be7e7ad07115abc1102
+size 78625112

colpali-v1.1/checkpoint-3500/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "./models/colpaligemma-3b-mix-448-base",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": "(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)",
+  "task_type": "FEATURE_EXTRACTION",
+  "use_dora": false,
+  "use_rslora": false
+}

colpali-v1.1/checkpoint-3500/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:948ed5e57c2eb866701430f59e28b3d8de670812cfcd932e1394b744031f1f26
+size 78625112

colpali-v1.1/checkpoint-3500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bea68c5eabf40ec234adc42a4173115af50a1716e1827015da07d9088f71f052
+size 157385722

colpali-v1.1/checkpoint-3500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0db386b438469df3ebf9750bd7c478ec4af19326ab81182cad3bd6f8baa775cd
+size 14244

colpali-v1.1/checkpoint-3500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dec2a32bee5157051265785df8f5c2b01ebf99f431525907c20aae270b809aea
+size 1064

colpali-v1.1/checkpoint-3500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,3043 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9474824038982134,
+  "eval_steps": 50,
+  "global_step": 3500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0027070925825663237,
+      "grad_norm": 0.81640625,
+      "learning_rate": 5e-06,
+      "loss": 0.7309,
+      "step": 10
+    },
+    {
+      "epoch": 0.005414185165132647,
+      "grad_norm": 1.2578125,
+      "learning_rate": 1e-05,
+      "loss": 0.7211,
+      "step": 20
+    },
+    {
+      "epoch": 0.008121277747698972,
+      "grad_norm": 0.7265625,
+      "learning_rate": 1.5e-05,
+      "loss": 0.7223,
+      "step": 30
+    },
+    {
+      "epoch": 0.010828370330265295,
+      "grad_norm": 0.640625,
+      "learning_rate": 2e-05,
+      "loss": 0.7158,
+      "step": 40
+    },
+    {
+      "epoch": 0.01353546291283162,
+      "grad_norm": 0.427734375,
+      "learning_rate": 2.5e-05,
+      "loss": 0.7123,
+      "step": 50
+    },
+    {
+      "epoch": 0.01353546291283162,
+      "eval_loss": 0.711525022983551,
+      "eval_runtime": 105.29,
+      "eval_samples_per_second": 4.749,
+      "eval_steps_per_second": 0.152,
+      "step": 50
+    },
+    {
+      "epoch": 0.016242555495397944,
+      "grad_norm": 0.8125,
+      "learning_rate": 3e-05,
+      "loss": 0.7142,
+      "step": 60
+    },
+    {
+      "epoch": 0.018949648077964266,
+      "grad_norm": 0.58203125,
+      "learning_rate": 3.5e-05,
+      "loss": 0.7052,
+      "step": 70
+    },
+    {
+      "epoch": 0.02165674066053059,
+      "grad_norm": 0.62109375,
+      "learning_rate": 4e-05,
+      "loss": 0.7083,
+      "step": 80
+    },
+    {
+      "epoch": 0.024363833243096916,
+      "grad_norm": 0.98046875,
+      "learning_rate": 4.5e-05,
+      "loss": 0.7036,
+      "step": 90
+    },
+    {
+      "epoch": 0.02707092582566324,
+      "grad_norm": 0.6640625,
+      "learning_rate": 5e-05,
+      "loss": 0.6903,
+      "step": 100
+    },
+    {
+      "epoch": 0.02707092582566324,
+      "eval_loss": 0.6922177672386169,
+      "eval_runtime": 63.6777,
+      "eval_samples_per_second": 7.852,
+      "eval_steps_per_second": 0.251,
+      "step": 100
+    },
+    {
+      "epoch": 0.02977801840822956,
+      "grad_norm": 0.96875,
+      "learning_rate": 4.986087924318308e-05,
+      "loss": 0.6695,
+      "step": 110
+    },
+    {
+      "epoch": 0.03248511099079589,
+      "grad_norm": 1.6484375,
+      "learning_rate": 4.972175848636616e-05,
+      "loss": 0.6261,
+      "step": 120
+    },
+    {
+      "epoch": 0.03519220357336221,
+      "grad_norm": 1.453125,
+      "learning_rate": 4.958263772954925e-05,
+      "loss": 0.5779,
+      "step": 130
+    },
+    {
+      "epoch": 0.03789929615592853,
+      "grad_norm": 3.09375,
+      "learning_rate": 4.944351697273234e-05,
+      "loss": 0.5185,
+      "step": 140
+    },
+    {
+      "epoch": 0.040606388738494856,
+      "grad_norm": 2.375,
+      "learning_rate": 4.930439621591542e-05,
+      "loss": 0.4695,
+      "step": 150
+    },
+    {
+      "epoch": 0.040606388738494856,
+      "eval_loss": 0.45527857542037964,
+      "eval_runtime": 64.9387,
+      "eval_samples_per_second": 7.7,
+      "eval_steps_per_second": 0.246,
+      "step": 150
+    },
+    {
+      "epoch": 0.04331348132106118,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.91652754590985e-05,
+      "loss": 0.4536,
+      "step": 160
+    },
+    {
+      "epoch": 0.0460205739036275,
+      "grad_norm": 3.671875,
+      "learning_rate": 4.9026154702281585e-05,
+      "loss": 0.4113,
+      "step": 170
+    },
+    {
+      "epoch": 0.04872766648619383,
+      "grad_norm": 2.71875,
+      "learning_rate": 4.8887033945464666e-05,
+      "loss": 0.3874,
+      "step": 180
+    },
+    {
+      "epoch": 0.051434759068760154,
+      "grad_norm": 1.6640625,
+      "learning_rate": 4.8747913188647746e-05,
+      "loss": 0.3714,
+      "step": 190
+    },
+    {
+      "epoch": 0.05414185165132648,
+      "grad_norm": 4.1875,
+      "learning_rate": 4.8608792431830826e-05,
+      "loss": 0.294,
+      "step": 200
+    },
+    {
+      "epoch": 0.05414185165132648,
+      "eval_loss": 0.3422486186027527,
+      "eval_runtime": 64.2742,
+      "eval_samples_per_second": 7.779,
+      "eval_steps_per_second": 0.249,
+      "step": 200
+    },
+    {
+      "epoch": 0.0568489442338928,
+      "grad_norm": 3.625,
+      "learning_rate": 4.8469671675013914e-05,
+      "loss": 0.4054,
+      "step": 210
+    },
+    {
+      "epoch": 0.05955603681645912,
+      "grad_norm": 2.984375,
+      "learning_rate": 4.8330550918197e-05,
+      "loss": 0.3213,
+      "step": 220
+    },
+    {
+      "epoch": 0.062263129399025445,
+      "grad_norm": 3.46875,
+      "learning_rate": 4.819143016138008e-05,
+      "loss": 0.3261,
+      "step": 230
+    },
+    {
+      "epoch": 0.06497022198159177,
+      "grad_norm": 2.328125,
+      "learning_rate": 4.805230940456316e-05,
+      "loss": 0.2817,
+      "step": 240
+    },
+    {
+      "epoch": 0.0676773145641581,
+      "grad_norm": 2.59375,
+      "learning_rate": 4.791318864774624e-05,
+      "loss": 0.2502,
+      "step": 250
+    },
+    {
+      "epoch": 0.0676773145641581,
+      "eval_loss": 0.2796708643436432,
+      "eval_runtime": 63.6818,
+      "eval_samples_per_second": 7.852,
+      "eval_steps_per_second": 0.251,
+      "step": 250
+    },
+    {
+      "epoch": 0.07038440714672442,
+      "grad_norm": 3.765625,
+      "learning_rate": 4.777406789092933e-05,
+      "loss": 0.2768,
+      "step": 260
+    },
+    {
+      "epoch": 0.07309149972929074,
+      "grad_norm": 6.375,
+      "learning_rate": 4.763494713411241e-05,
+      "loss": 0.2615,
+      "step": 270
+    },
+    {
+      "epoch": 0.07579859231185707,
+      "grad_norm": 3.328125,
+      "learning_rate": 4.7495826377295496e-05,
+      "loss": 0.248,
+      "step": 280
+    },
+    {
+      "epoch": 0.07850568489442339,
+      "grad_norm": 2.109375,
+      "learning_rate": 4.735670562047858e-05,
+      "loss": 0.1904,
+      "step": 290
+    },
+    {
+      "epoch": 0.08121277747698971,
+      "grad_norm": 1.7734375,
+      "learning_rate": 4.7217584863661664e-05,
+      "loss": 0.2066,
+      "step": 300
+    },
+    {
+      "epoch": 0.08121277747698971,
+      "eval_loss": 0.24386852979660034,
+      "eval_runtime": 63.7131,
+      "eval_samples_per_second": 7.848,
+      "eval_steps_per_second": 0.251,
+      "step": 300
+    },
+    {
+      "epoch": 0.08391987005955603,
+      "grad_norm": 2.515625,
+      "learning_rate": 4.7078464106844744e-05,
+      "loss": 0.2251,
+      "step": 310
+    },
+    {
+      "epoch": 0.08662696264212236,
+      "grad_norm": 1.9453125,
+      "learning_rate": 4.6939343350027825e-05,
+      "loss": 0.2066,
+      "step": 320
+    },
+    {
+      "epoch": 0.08933405522468868,
+      "grad_norm": 1.8515625,
+      "learning_rate": 4.6800222593210905e-05,
+      "loss": 0.1942,
+      "step": 330
+    },
+    {
+      "epoch": 0.092041147807255,
+      "grad_norm": 2.3125,
+      "learning_rate": 4.666110183639399e-05,
+      "loss": 0.2184,
+      "step": 340
+    },
+    {
+      "epoch": 0.09474824038982133,
+      "grad_norm": 2.265625,
+      "learning_rate": 4.652198107957708e-05,
+      "loss": 0.2343,
+      "step": 350
+    },
+    {
+      "epoch": 0.09474824038982133,
+      "eval_loss": 0.23647841811180115,
+      "eval_runtime": 63.9433,
+      "eval_samples_per_second": 7.819,
+      "eval_steps_per_second": 0.25,
+      "step": 350
+    },
+    {
+      "epoch": 0.09745533297238766,
+      "grad_norm": 2.09375,
+      "learning_rate": 4.638286032276016e-05,
+      "loss": 0.2554,
+      "step": 360
+    },
+    {
+      "epoch": 0.10016242555495398,
+      "grad_norm": 1.828125,
+      "learning_rate": 4.624373956594324e-05,
+      "loss": 0.2288,
+      "step": 370
+    },
+    {
+      "epoch": 0.10286951813752031,
+      "grad_norm": 2.328125,
+      "learning_rate": 4.610461880912633e-05,
+      "loss": 0.2159,
+      "step": 380
+    },
+    {
+      "epoch": 0.10557661072008663,
+      "grad_norm": 2.1875,
+      "learning_rate": 4.596549805230941e-05,
+      "loss": 0.1825,
+      "step": 390
+    },
+    {
+      "epoch": 0.10828370330265295,
+      "grad_norm": 2.4375,
+      "learning_rate": 4.582637729549249e-05,
+      "loss": 0.2525,
+      "step": 400
+    },
+    {
+      "epoch": 0.10828370330265295,
+      "eval_loss": 0.21003711223602295,
+      "eval_runtime": 64.3464,
+      "eval_samples_per_second": 7.77,
+      "eval_steps_per_second": 0.249,
+      "step": 400
+    },
+    {
+      "epoch": 0.11099079588521928,
+      "grad_norm": 2.4375,
+      "learning_rate": 4.568725653867557e-05,
+      "loss": 0.1708,
+      "step": 410
+    },
+    {
+      "epoch": 0.1136978884677856,
+      "grad_norm": 3.140625,
+      "learning_rate": 4.5548135781858655e-05,
+      "loss": 0.1939,
+      "step": 420
+    },
+    {
+      "epoch": 0.11640498105035192,
+      "grad_norm": 2.5625,
+      "learning_rate": 4.540901502504174e-05,
+      "loss": 0.1732,
+      "step": 430
+    },
+    {
+      "epoch": 0.11911207363291824,
+      "grad_norm": 1.3984375,
+      "learning_rate": 4.526989426822482e-05,
+      "loss": 0.2078,
+      "step": 440
+    },
+    {
+      "epoch": 0.12181916621548457,
+      "grad_norm": 7.5,
+      "learning_rate": 4.51307735114079e-05,
+      "loss": 0.1984,
+      "step": 450
+    },
+    {
+      "epoch": 0.12181916621548457,
+      "eval_loss": 0.2016001045703888,
+      "eval_runtime": 98.9371,
+      "eval_samples_per_second": 5.054,
+      "eval_steps_per_second": 0.162,
+      "step": 450
+    },
+    {
+      "epoch": 0.12452625879805089,
+      "grad_norm": 2.78125,
+      "learning_rate": 4.4991652754590984e-05,
+      "loss": 0.1846,
+      "step": 460
+    },
+    {
+      "epoch": 0.12723335138061723,
+      "grad_norm": 1.0,
+      "learning_rate": 4.485253199777407e-05,
+      "loss": 0.1628,
+      "step": 470
+    },
+    {
+      "epoch": 0.12994044396318355,
+      "grad_norm": 0.65625,
+      "learning_rate": 4.471341124095715e-05,
+      "loss": 0.1627,
+      "step": 480
+    },
+    {
+      "epoch": 0.13264753654574987,
+      "grad_norm": 1.1953125,
+      "learning_rate": 4.457429048414024e-05,
+      "loss": 0.1694,
+      "step": 490
+    },
+    {
+      "epoch": 0.1353546291283162,
+      "grad_norm": 1.5078125,
+      "learning_rate": 4.443516972732332e-05,
+      "loss": 0.2156,
+      "step": 500
+    },
+    {
+      "epoch": 0.1353546291283162,
+      "eval_loss": 0.19197307527065277,
+      "eval_runtime": 72.4725,
+      "eval_samples_per_second": 6.899,
+      "eval_steps_per_second": 0.221,
+      "step": 500
+    },
+    {
+      "epoch": 0.13806172171088252,
+      "grad_norm": 3.140625,
+      "learning_rate": 4.4296048970506406e-05,
+      "loss": 0.1641,
+      "step": 510
+    },
+    {
+      "epoch": 0.14076881429344884,
+      "grad_norm": 2.359375,
+      "learning_rate": 4.4156928213689486e-05,
+      "loss": 0.1925,
+      "step": 520
+    },
+    {
+      "epoch": 0.14347590687601516,
+      "grad_norm": 1.375,
+      "learning_rate": 4.4017807456872566e-05,
+      "loss": 0.1794,
+      "step": 530
+    },
+    {
+      "epoch": 0.1461829994585815,
+      "grad_norm": 4.96875,
+      "learning_rate": 4.387868670005565e-05,
+      "loss": 0.197,
+      "step": 540
+    },
+    {
+      "epoch": 0.1488900920411478,
+      "grad_norm": 1.1015625,
+      "learning_rate": 4.373956594323873e-05,
+      "loss": 0.1423,
+      "step": 550
+    },
+    {
+      "epoch": 0.1488900920411478,
+      "eval_loss": 0.19690875709056854,
+      "eval_runtime": 62.7419,
+      "eval_samples_per_second": 7.969,
+      "eval_steps_per_second": 0.255,
+      "step": 550
+    },
+    {
+      "epoch": 0.15159718462371413,
+      "grad_norm": 1.5234375,
+      "learning_rate": 4.360044518642182e-05,
+      "loss": 0.1976,
+      "step": 560
+    },
+    {
+      "epoch": 0.15430427720628045,
+      "grad_norm": 0.57421875,
+      "learning_rate": 4.34613244296049e-05,
+      "loss": 0.14,
+      "step": 570
+    },
+    {
+      "epoch": 0.15701136978884678,
+      "grad_norm": 2.890625,
+      "learning_rate": 4.332220367278798e-05,
+      "loss": 0.2063,
+      "step": 580
+    },
+    {
+      "epoch": 0.1597184623714131,
+      "grad_norm": 2.15625,
+      "learning_rate": 4.318308291597106e-05,
+      "loss": 0.2194,
+      "step": 590
+    },
+    {
+      "epoch": 0.16242555495397942,
+      "grad_norm": 1.40625,
+      "learning_rate": 4.304396215915415e-05,
+      "loss": 0.1308,
+      "step": 600
+    },
+    {
+      "epoch": 0.16242555495397942,
+      "eval_loss": 0.1980336606502533,
+      "eval_runtime": 62.7479,
+      "eval_samples_per_second": 7.968,
+      "eval_steps_per_second": 0.255,
+      "step": 600
+    },
+    {
+      "epoch": 0.16513264753654575,
+      "grad_norm": 0.8046875,
+      "learning_rate": 4.290484140233723e-05,
+      "loss": 0.1557,
+      "step": 610
+    },
+    {
+      "epoch": 0.16783974011911207,
+      "grad_norm": 2.21875,
+      "learning_rate": 4.276572064552031e-05,
+      "loss": 0.1923,
+      "step": 620
+    },
+    {
+      "epoch": 0.1705468327016784,
+      "grad_norm": 0.73828125,
+      "learning_rate": 4.26265998887034e-05,
+      "loss": 0.1714,
+      "step": 630
+    },
+    {
+      "epoch": 0.17325392528424471,
+      "grad_norm": 3.578125,
+      "learning_rate": 4.2487479131886484e-05,
+      "loss": 0.184,
+      "step": 640
+    },
+    {
+      "epoch": 0.17596101786681104,
+      "grad_norm": 1.8125,
+      "learning_rate": 4.2348358375069565e-05,
+      "loss": 0.2067,
+      "step": 650
+    },
+    {
+      "epoch": 0.17596101786681104,
+      "eval_loss": 0.19718270003795624,
+      "eval_runtime": 62.7349,
+      "eval_samples_per_second": 7.97,
+      "eval_steps_per_second": 0.255,
+      "step": 650
+    },
+    {
+      "epoch": 0.17866811044937736,
+      "grad_norm": 2.734375,
+      "learning_rate": 4.2209237618252645e-05,
+      "loss": 0.156,
+      "step": 660
+    },
+    {
+      "epoch": 0.18137520303194368,
+      "grad_norm": 1.6328125,
+      "learning_rate": 4.2070116861435725e-05,
+      "loss": 0.1309,
+      "step": 670
+    },
+    {
+      "epoch": 0.18408229561451,
+      "grad_norm": 3.28125,
+      "learning_rate": 4.193099610461881e-05,
+      "loss": 0.1983,
+      "step": 680
+    },
+    {
+      "epoch": 0.18678938819707633,
+      "grad_norm": 1.765625,
+      "learning_rate": 4.179187534780189e-05,
+      "loss": 0.159,
+      "step": 690
+    },
+    {
+      "epoch": 0.18949648077964265,
+      "grad_norm": 0.62109375,
+      "learning_rate": 4.165275459098498e-05,
+      "loss": 0.1481,
+      "step": 700
+    },
+    {
+      "epoch": 0.18949648077964265,
+      "eval_loss": 0.1895207166671753,
+      "eval_runtime": 62.6131,
+      "eval_samples_per_second": 7.986,
+      "eval_steps_per_second": 0.256,
+      "step": 700
+    },
+    {
+      "epoch": 0.19220357336220897,
+      "grad_norm": 1.984375,
+      "learning_rate": 4.151363383416806e-05,
+      "loss": 0.2162,
+      "step": 710
+    },
+    {
+      "epoch": 0.19491066594477532,
+      "grad_norm": 1.1875,
+      "learning_rate": 4.137451307735114e-05,
+      "loss": 0.1278,
+      "step": 720
+    },
+    {
+      "epoch": 0.19761775852734165,
+      "grad_norm": 0.7734375,
+      "learning_rate": 4.123539232053423e-05,
+      "loss": 0.1714,
+      "step": 730
+    },
+    {
+      "epoch": 0.20032485110990797,
+      "grad_norm": 1.90625,
+      "learning_rate": 4.109627156371731e-05,
+      "loss": 0.1764,
+      "step": 740
+    },
+    {
+      "epoch": 0.2030319436924743,
+      "grad_norm": 1.9765625,
+      "learning_rate": 4.095715080690039e-05,
+      "loss": 0.161,
+      "step": 750
+    },
+    {
+      "epoch": 0.2030319436924743,
+      "eval_loss": 0.18931728601455688,
+      "eval_runtime": 63.0862,
+      "eval_samples_per_second": 7.926,
+      "eval_steps_per_second": 0.254,
+      "step": 750
+    },
+    {
+      "epoch": 0.20573903627504062,
+      "grad_norm": 0.859375,
+      "learning_rate": 4.081803005008347e-05,
+      "loss": 0.1913,
+      "step": 760
+    },
+    {
+      "epoch": 0.20844612885760694,
+      "grad_norm": 1.0234375,
+      "learning_rate": 4.0678909293266556e-05,
+      "loss": 0.1447,
+      "step": 770
+    },
+    {
+      "epoch": 0.21115322144017326,
+      "grad_norm": 1.265625,
+      "learning_rate": 4.053978853644964e-05,
+      "loss": 0.1265,
+      "step": 780
+    },
+    {
+      "epoch": 0.21386031402273958,
+      "grad_norm": 3.5,
+      "learning_rate": 4.0400667779632724e-05,
+      "loss": 0.1812,
+      "step": 790
+    },
+    {
+      "epoch": 0.2165674066053059,
+      "grad_norm": 1.484375,
+      "learning_rate": 4.0261547022815804e-05,
+      "loss": 0.1352,
+      "step": 800
+    },
+    {
+      "epoch": 0.2165674066053059,
+      "eval_loss": 0.1901209056377411,
+      "eval_runtime": 63.1622,
+      "eval_samples_per_second": 7.916,
+      "eval_steps_per_second": 0.253,
+      "step": 800
+    },
+    {
+      "epoch": 0.21927449918787223,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.012242626599889e-05,
+      "loss": 0.1266,
+      "step": 810
+    },
+    {
+      "epoch": 0.22198159177043855,
+      "grad_norm": 2.71875,
+      "learning_rate": 3.998330550918197e-05,
+      "loss": 0.1408,
+      "step": 820
+    },
+    {
+      "epoch": 0.22468868435300487,
+      "grad_norm": 1.6875,
+      "learning_rate": 3.984418475236505e-05,
+      "loss": 0.1268,
+      "step": 830
+    },
+    {
+      "epoch": 0.2273957769355712,
+      "grad_norm": 1.5234375,
+      "learning_rate": 3.970506399554814e-05,
+      "loss": 0.1397,
+      "step": 840
+    },
+    {
+      "epoch": 0.23010286951813752,
+      "grad_norm": 1.421875,
+      "learning_rate": 3.956594323873122e-05,
+      "loss": 0.2107,
+      "step": 850
+    },
+    {
+      "epoch": 0.23010286951813752,
+      "eval_loss": 0.18407949805259705,
+      "eval_runtime": 62.4768,
+      "eval_samples_per_second": 8.003,
+      "eval_steps_per_second": 0.256,
+      "step": 850
+    },
+    {
+      "epoch": 0.23280996210070384,
+      "grad_norm": 2.515625,
+      "learning_rate": 3.9426822481914307e-05,
+      "loss": 0.1519,
+      "step": 860
+    },
+    {
+      "epoch": 0.23551705468327017,
+      "grad_norm": 2.3125,
+      "learning_rate": 3.928770172509739e-05,
+      "loss": 0.1877,
+      "step": 870
+    },
+    {
+      "epoch": 0.2382241472658365,
+      "grad_norm": 1.453125,
+      "learning_rate": 3.914858096828047e-05,
+      "loss": 0.099,
+      "step": 880
+    },
+    {
+      "epoch": 0.2409312398484028,
+      "grad_norm": 1.171875,
+      "learning_rate": 3.900946021146355e-05,
+      "loss": 0.1565,
+      "step": 890
+    },
+    {
+      "epoch": 0.24363833243096913,
+      "grad_norm": 0.44921875,
+      "learning_rate": 3.8870339454646635e-05,
+      "loss": 0.1352,
+      "step": 900
+    },
+    {
+      "epoch": 0.24363833243096913,
+      "eval_loss": 0.18001192808151245,
+      "eval_runtime": 62.7885,
+      "eval_samples_per_second": 7.963,
+      "eval_steps_per_second": 0.255,
+      "step": 900
+    },
+    {
+      "epoch": 0.24634542501353546,
+      "grad_norm": 2.140625,
+      "learning_rate": 3.873121869782972e-05,
+      "loss": 0.1469,
+      "step": 910
+    },
+    {
+      "epoch": 0.24905251759610178,
+      "grad_norm": 1.453125,
+      "learning_rate": 3.85920979410128e-05,
+      "loss": 0.1918,
+      "step": 920
+    },
+    {
+      "epoch": 0.2517596101786681,
+      "grad_norm": 1.4609375,
+      "learning_rate": 3.845297718419588e-05,
+      "loss": 0.1836,
+      "step": 930
+    },
+    {
+      "epoch": 0.25446670276123445,
+      "grad_norm": 2.625,
+      "learning_rate": 3.831385642737897e-05,
+      "loss": 0.1588,
+      "step": 940
+    },
+    {
+      "epoch": 0.25717379534380075,
+      "grad_norm": 0.6953125,
+      "learning_rate": 3.817473567056205e-05,
+      "loss": 0.1503,
+      "step": 950
+    },
+    {
+      "epoch": 0.25717379534380075,
+      "eval_loss": 0.17602640390396118,
+      "eval_runtime": 63.175,
+      "eval_samples_per_second": 7.915,
+      "eval_steps_per_second": 0.253,
+      "step": 950
+    },
+    {
+      "epoch": 0.2598808879263671,
+      "grad_norm": 2.0,
+      "learning_rate": 3.803561491374513e-05,
+      "loss": 0.1356,
+      "step": 960
+    },
+    {
+      "epoch": 0.2625879805089334,
+      "grad_norm": 0.9375,
+      "learning_rate": 3.789649415692821e-05,
+      "loss": 0.1702,
+      "step": 970
+    },
+    {
+      "epoch": 0.26529507309149974,
+      "grad_norm": 0.92578125,
+      "learning_rate": 3.77573734001113e-05,
+      "loss": 0.1545,
+      "step": 980
+    },
+    {
+      "epoch": 0.26800216567406604,
+      "grad_norm": 0.6640625,
+      "learning_rate": 3.7618252643294385e-05,
+      "loss": 0.1421,
+      "step": 990
+    },
+    {
+      "epoch": 0.2707092582566324,
+      "grad_norm": 3.96875,
+      "learning_rate": 3.7479131886477466e-05,
+      "loss": 0.1255,
+      "step": 1000
+    },
+    {
+      "epoch": 0.2707092582566324,
+      "eval_loss": 0.18262001872062683,
+      "eval_runtime": 78.6472,
+      "eval_samples_per_second": 6.358,
+      "eval_steps_per_second": 0.203,
+      "step": 1000
+    },
+    {
+      "epoch": 0.2734163508391987,
+      "grad_norm": 1.2734375,
+      "learning_rate": 3.7340011129660546e-05,
+      "loss": 0.144,
+      "step": 1010
+    },
+    {
+      "epoch": 0.27612344342176504,
+      "grad_norm": 1.7109375,
+      "learning_rate": 3.7200890372843626e-05,
+      "loss": 0.108,
+      "step": 1020
+    },
+    {
+      "epoch": 0.27883053600433133,
+      "grad_norm": 2.9375,
+      "learning_rate": 3.7061769616026713e-05,
+      "loss": 0.1255,
+      "step": 1030
+    },
+    {
+      "epoch": 0.2815376285868977,
+      "grad_norm": 1.25,
+      "learning_rate": 3.6922648859209794e-05,
+      "loss": 0.1421,
+      "step": 1040
+    },
+    {
+      "epoch": 0.284244721169464,
+      "grad_norm": 4.625,
+      "learning_rate": 3.678352810239288e-05,
+      "loss": 0.1686,
+      "step": 1050
+    },
+    {
+      "epoch": 0.284244721169464,
+      "eval_loss": 0.18178632855415344,
+      "eval_runtime": 62.9144,
+      "eval_samples_per_second": 7.947,
+      "eval_steps_per_second": 0.254,
+      "step": 1050
+    },
+    {
+      "epoch": 0.2869518137520303,
+      "grad_norm": 1.1328125,
+      "learning_rate": 3.664440734557596e-05,
+      "loss": 0.1147,
+      "step": 1060
+    },
+    {
+      "epoch": 0.2896589063345966,
+      "grad_norm": 1.703125,
+      "learning_rate": 3.650528658875905e-05,
+      "loss": 0.1517,
+      "step": 1070
+    },
+    {
+      "epoch": 0.292365998917163,
+      "grad_norm": 2.328125,
+      "learning_rate": 3.636616583194213e-05,
+      "loss": 0.1545,
+      "step": 1080
+    },
+    {
+      "epoch": 0.29507309149972927,
+      "grad_norm": 1.984375,
+      "learning_rate": 3.622704507512521e-05,
+      "loss": 0.1471,
+      "step": 1090
+    },
+    {
+      "epoch": 0.2977801840822956,
+      "grad_norm": 2.515625,
+      "learning_rate": 3.608792431830829e-05,
+      "loss": 0.1522,
+      "step": 1100
+    },
+    {
+      "epoch": 0.2977801840822956,
+      "eval_loss": 0.17702646553516388,
+      "eval_runtime": 63.2535,
+      "eval_samples_per_second": 7.905,
+      "eval_steps_per_second": 0.253,
+      "step": 1100
+    },
+    {
+      "epoch": 0.3004872766648619,
+      "grad_norm": 1.5859375,
+      "learning_rate": 3.594880356149138e-05,
+      "loss": 0.1772,
+      "step": 1110
+    },
+    {
+      "epoch": 0.30319436924742826,
+      "grad_norm": 0.77734375,
+      "learning_rate": 3.580968280467446e-05,
+      "loss": 0.1361,
+      "step": 1120
+    },
+    {
+      "epoch": 0.30590146182999456,
+      "grad_norm": 0.427734375,
+      "learning_rate": 3.5670562047857544e-05,
+      "loss": 0.128,
+      "step": 1130
+    },
+    {
+      "epoch": 0.3086085544125609,
+      "grad_norm": 2.328125,
+      "learning_rate": 3.5531441291040625e-05,
+      "loss": 0.1313,
+      "step": 1140
+    },
+    {
+      "epoch": 0.31131564699512726,
+      "grad_norm": 1.34375,
+      "learning_rate": 3.5392320534223705e-05,
+      "loss": 0.1238,
+      "step": 1150
+    },
+    {
+      "epoch": 0.31131564699512726,
+      "eval_loss": 0.17454275488853455,
+      "eval_runtime": 62.5874,
+      "eval_samples_per_second": 7.989,
+      "eval_steps_per_second": 0.256,
+      "step": 1150
+    },
+    {
+      "epoch": 0.31402273957769355,
+      "grad_norm": 0.451171875,
+      "learning_rate": 3.525319977740679e-05,
+      "loss": 0.122,
+      "step": 1160
+    },
+    {
+      "epoch": 0.3167298321602599,
+      "grad_norm": 2.59375,
+      "learning_rate": 3.511407902058987e-05,
+      "loss": 0.174,
+      "step": 1170
+    },
+    {
+      "epoch": 0.3194369247428262,
+      "grad_norm": 1.8671875,
+      "learning_rate": 3.497495826377295e-05,
+      "loss": 0.1741,
+      "step": 1180
+    },
+    {
+      "epoch": 0.32214401732539255,
+      "grad_norm": 1.5,
+      "learning_rate": 3.483583750695604e-05,
+      "loss": 0.1116,
+      "step": 1190
+    },
+    {
+      "epoch": 0.32485110990795885,
+      "grad_norm": 3.875,
+      "learning_rate": 3.469671675013913e-05,
+      "loss": 0.1179,
+      "step": 1200
+    },
+    {
+      "epoch": 0.32485110990795885,
+      "eval_loss": 0.17608264088630676,
+      "eval_runtime": 62.6246,
+      "eval_samples_per_second": 7.984,
+      "eval_steps_per_second": 0.255,
+      "step": 1200
+    },
+    {
+      "epoch": 0.3275582024905252,
+      "grad_norm": 1.7421875,
+      "learning_rate": 3.455759599332221e-05,
+      "loss": 0.1332,
+      "step": 1210
+    },
+    {
+      "epoch": 0.3302652950730915,
+      "grad_norm": 2.578125,
+      "learning_rate": 3.441847523650529e-05,
+      "loss": 0.1425,
+      "step": 1220
+    },
+    {
+      "epoch": 0.33297238765565784,
+      "grad_norm": 2.734375,
+      "learning_rate": 3.427935447968837e-05,
+      "loss": 0.2023,
+      "step": 1230
+    },
+    {
+      "epoch": 0.33567948023822414,
+      "grad_norm": 0.84375,
+      "learning_rate": 3.4140233722871455e-05,
+      "loss": 0.1373,
+      "step": 1240
+    },
+    {
+      "epoch": 0.3383865728207905,
+      "grad_norm": 1.203125,
+      "learning_rate": 3.4001112966054536e-05,
+      "loss": 0.123,
+      "step": 1250
+    },
+    {
+      "epoch": 0.3383865728207905,
+      "eval_loss": 0.16774284839630127,
+      "eval_runtime": 62.784,
+      "eval_samples_per_second": 7.964,
+      "eval_steps_per_second": 0.255,
+      "step": 1250
+    },
+    {
+      "epoch": 0.3410936654033568,
+      "grad_norm": 2.6875,
+      "learning_rate": 3.386199220923762e-05,
+      "loss": 0.1289,
+      "step": 1260
+    },
+    {
+      "epoch": 0.34380075798592313,
+      "grad_norm": 3.28125,
+      "learning_rate": 3.37228714524207e-05,
+      "loss": 0.1533,
+      "step": 1270
+    },
+    {
+      "epoch": 0.34650785056848943,
+      "grad_norm": 1.359375,
+      "learning_rate": 3.358375069560379e-05,
+      "loss": 0.1829,
+      "step": 1280
+    },
+    {
+      "epoch": 0.3492149431510558,
+      "grad_norm": 2.15625,
+      "learning_rate": 3.344462993878687e-05,
+      "loss": 0.128,
+      "step": 1290
+    },
+    {
+      "epoch": 0.3519220357336221,
+      "grad_norm": 2.21875,
+      "learning_rate": 3.330550918196995e-05,
+      "loss": 0.1807,
+      "step": 1300
+    },
+    {
+      "epoch": 0.3519220357336221,
+      "eval_loss": 0.15718665719032288,
+      "eval_runtime": 62.4512,
+      "eval_samples_per_second": 8.006,
+      "eval_steps_per_second": 0.256,
+      "step": 1300
+    },
+    {
+      "epoch": 0.3546291283161884,
+      "grad_norm": 0.5390625,
+      "learning_rate": 3.316638842515303e-05,
+      "loss": 0.1276,
+      "step": 1310
+    },
+    {
+      "epoch": 0.3573362208987547,
+      "grad_norm": 1.7265625,
+      "learning_rate": 3.302726766833611e-05,
+      "loss": 0.1422,
+      "step": 1320
+    },
+    {
+      "epoch": 0.36004331348132107,
+      "grad_norm": 3.125,
+      "learning_rate": 3.28881469115192e-05,
+      "loss": 0.2032,
+      "step": 1330
+    },
+    {
+      "epoch": 0.36275040606388737,
+      "grad_norm": 1.8515625,
+      "learning_rate": 3.2749026154702286e-05,
+      "loss": 0.1726,
+      "step": 1340
+    },
+    {
+      "epoch": 0.3654574986464537,
+      "grad_norm": 1.0625,
+      "learning_rate": 3.2609905397885366e-05,
+      "loss": 0.1432,
+      "step": 1350
+    },
+    {
+      "epoch": 0.3654574986464537,
+      "eval_loss": 0.15765319764614105,
+      "eval_runtime": 63.3444,
+      "eval_samples_per_second": 7.893,
+      "eval_steps_per_second": 0.253,
+      "step": 1350
+    },
+    {
+      "epoch": 0.36816459122902,
+      "grad_norm": 1.125,
+      "learning_rate": 3.247078464106845e-05,
+      "loss": 0.1008,
+      "step": 1360
+    },
+    {
+      "epoch": 0.37087168381158636,
+      "grad_norm": 1.734375,
+      "learning_rate": 3.2331663884251534e-05,
+      "loss": 0.1397,
+      "step": 1370
+    },
+    {
+      "epoch": 0.37357877639415266,
+      "grad_norm": 1.6484375,
+      "learning_rate": 3.2192543127434614e-05,
+      "loss": 0.1342,
+      "step": 1380
+    },
+    {
+      "epoch": 0.376285868976719,
+      "grad_norm": 1.0859375,
+      "learning_rate": 3.2053422370617695e-05,
+      "loss": 0.1528,
+      "step": 1390
+    },
+    {
+      "epoch": 0.3789929615592853,
+      "grad_norm": 2.421875,
+      "learning_rate": 3.191430161380078e-05,
+      "loss": 0.1313,
+      "step": 1400
+    },
+    {
+      "epoch": 0.3789929615592853,
+      "eval_loss": 0.16292478144168854,
+      "eval_runtime": 64.15,
+      "eval_samples_per_second": 7.794,
+      "eval_steps_per_second": 0.249,
+      "step": 1400
+    },
+    {
+      "epoch": 0.38170005414185165,
+      "grad_norm": 2.09375,
+      "learning_rate": 3.177518085698387e-05,
+      "loss": 0.1358,
+      "step": 1410
+    },
+    {
+      "epoch": 0.38440714672441795,
+      "grad_norm": 3.0,
+      "learning_rate": 3.163606010016695e-05,
+      "loss": 0.1683,
+      "step": 1420
+    },
+    {
+      "epoch": 0.3871142393069843,
+      "grad_norm": 2.109375,
+      "learning_rate": 3.149693934335003e-05,
+      "loss": 0.136,
+      "step": 1430
+    },
+    {
+      "epoch": 0.38982133188955065,
+      "grad_norm": 2.15625,
+      "learning_rate": 3.135781858653311e-05,
+      "loss": 0.1435,
+      "step": 1440
+    },
+    {
+      "epoch": 0.39252842447211694,
+      "grad_norm": 2.046875,
+      "learning_rate": 3.121869782971619e-05,
+      "loss": 0.1711,
+      "step": 1450
+    },
+    {
+      "epoch": 0.39252842447211694,
+      "eval_loss": 0.15946637094020844,
+      "eval_runtime": 65.0021,
+      "eval_samples_per_second": 7.692,
+      "eval_steps_per_second": 0.246,
+      "step": 1450
+    },
+    {
+      "epoch": 0.3952355170546833,
+      "grad_norm": 0.80078125,
+      "learning_rate": 3.107957707289928e-05,
+      "loss": 0.1672,
+      "step": 1460
+    },
+    {
+      "epoch": 0.3979426096372496,
+      "grad_norm": 0.703125,
+      "learning_rate": 3.0940456316082365e-05,
+      "loss": 0.1152,
+      "step": 1470
+    },
+    {
+      "epoch": 0.40064970221981594,
+      "grad_norm": 2.203125,
+      "learning_rate": 3.0801335559265445e-05,
+      "loss": 0.1628,
+      "step": 1480
+    },
+    {
+      "epoch": 0.40335679480238223,
+      "grad_norm": 1.7734375,
+      "learning_rate": 3.0662214802448525e-05,
+      "loss": 0.1474,
+      "step": 1490
+    },
+    {
+      "epoch": 0.4060638873849486,
+      "grad_norm": 1.015625,
+      "learning_rate": 3.052309404563161e-05,
+      "loss": 0.1211,
+      "step": 1500
+    },
+    {
+      "epoch": 0.4060638873849486,
+      "eval_loss": 0.16578222811222076,
+      "eval_runtime": 77.637,
+      "eval_samples_per_second": 6.44,
+      "eval_steps_per_second": 0.206,
+      "step": 1500
+    },
+    {
+      "epoch": 0.4087709799675149,
+      "grad_norm": 2.0625,
+      "learning_rate": 3.0383973288814693e-05,
+      "loss": 0.1388,
+      "step": 1510
+    },
+    {
+      "epoch": 0.41147807255008123,
+      "grad_norm": 2.671875,
+      "learning_rate": 3.0244852531997773e-05,
+      "loss": 0.1518,
+      "step": 1520
+    },
+    {
+      "epoch": 0.4141851651326475,
+      "grad_norm": 2.046875,
+      "learning_rate": 3.0105731775180857e-05,
+      "loss": 0.1536,
+      "step": 1530
+    },
+    {
+      "epoch": 0.4168922577152139,
+      "grad_norm": 2.0625,
+      "learning_rate": 2.9966611018363944e-05,
+      "loss": 0.1464,
+      "step": 1540
+    },
+    {
+      "epoch": 0.41959935029778017,
+      "grad_norm": 1.9609375,
+      "learning_rate": 2.9827490261547024e-05,
+      "loss": 0.1585,
+      "step": 1550
+    },
+    {
+      "epoch": 0.41959935029778017,
+      "eval_loss": 0.16232775151729584,
+      "eval_runtime": 64.4195,
+      "eval_samples_per_second": 7.762,
+      "eval_steps_per_second": 0.248,
+      "step": 1550
+    },
+    {
+      "epoch": 0.4223064428803465,
+      "grad_norm": 1.1640625,
+      "learning_rate": 2.9688369504730108e-05,
+      "loss": 0.1078,
+      "step": 1560
+    },
+    {
+      "epoch": 0.4250135354629128,
+      "grad_norm": 0.9453125,
+      "learning_rate": 2.954924874791319e-05,
+      "loss": 0.1191,
+      "step": 1570
+    },
+    {
+      "epoch": 0.42772062804547917,
+      "grad_norm": 2.328125,
+      "learning_rate": 2.9410127991096276e-05,
+      "loss": 0.1366,
+      "step": 1580
+    },
+    {
+      "epoch": 0.43042772062804546,
+      "grad_norm": 1.0390625,
+      "learning_rate": 2.9271007234279356e-05,
+      "loss": 0.1382,
+      "step": 1590
+    },
+    {
+      "epoch": 0.4331348132106118,
+      "grad_norm": 1.03125,
+      "learning_rate": 2.913188647746244e-05,
+      "loss": 0.1545,
+      "step": 1600
+    },
+    {
+      "epoch": 0.4331348132106118,
+      "eval_loss": 0.16547426581382751,
+      "eval_runtime": 64.8149,
+      "eval_samples_per_second": 7.714,
+      "eval_steps_per_second": 0.247,
+      "step": 1600
+    },
+    {
+      "epoch": 0.4358419057931781,
+      "grad_norm": 1.0390625,
+      "learning_rate": 2.899276572064552e-05,
+      "loss": 0.1232,
+      "step": 1610
+    },
+    {
+      "epoch": 0.43854899837574446,
+      "grad_norm": 0.45703125,
+      "learning_rate": 2.88536449638286e-05,
+      "loss": 0.1371,
+      "step": 1620
+    },
+    {
+      "epoch": 0.44125609095831075,
+      "grad_norm": 3.28125,
+      "learning_rate": 2.8714524207011688e-05,
+      "loss": 0.1301,
+      "step": 1630
+    },
+    {
+      "epoch": 0.4439631835408771,
+      "grad_norm": 2.703125,
+      "learning_rate": 2.857540345019477e-05,
+      "loss": 0.122,
+      "step": 1640
+    },
+    {
+      "epoch": 0.4466702761234434,
+      "grad_norm": 1.203125,
+      "learning_rate": 2.8436282693377852e-05,
+      "loss": 0.1649,
+      "step": 1650
+    },
+    {
+      "epoch": 0.4466702761234434,
+      "eval_loss": 0.15991100668907166,
+      "eval_runtime": 64.6415,
+      "eval_samples_per_second": 7.735,
+      "eval_steps_per_second": 0.248,
+      "step": 1650
+    },
+    {
+      "epoch": 0.44937736870600975,
+      "grad_norm": 1.28125,
+      "learning_rate": 2.8297161936560936e-05,
+      "loss": 0.158,
+      "step": 1660
+    },
+    {
+      "epoch": 0.45208446128857604,
+      "grad_norm": 0.9375,
+      "learning_rate": 2.8158041179744023e-05,
+      "loss": 0.1427,
+      "step": 1670
+    },
+    {
+      "epoch": 0.4547915538711424,
+      "grad_norm": 1.6796875,
+      "learning_rate": 2.8018920422927103e-05,
+      "loss": 0.1528,
+      "step": 1680
+    },
+    {
+      "epoch": 0.4574986464537087,
+      "grad_norm": 0.78125,
+      "learning_rate": 2.7879799666110183e-05,
+      "loss": 0.1117,
+      "step": 1690
+    },
+    {
+      "epoch": 0.46020573903627504,
+      "grad_norm": 1.3203125,
+      "learning_rate": 2.7740678909293267e-05,
+      "loss": 0.1069,
+      "step": 1700
+    },
+    {
+      "epoch": 0.46020573903627504,
+      "eval_loss": 0.16090016067028046,
+      "eval_runtime": 64.8286,
+      "eval_samples_per_second": 7.713,
+      "eval_steps_per_second": 0.247,
+      "step": 1700
+    },
+    {
+      "epoch": 0.4629128316188414,
+      "grad_norm": 1.703125,
+      "learning_rate": 2.7601558152476354e-05,
+      "loss": 0.1165,
+      "step": 1710
+    },
+    {
+      "epoch": 0.4656199242014077,
+      "grad_norm": 2.015625,
+      "learning_rate": 2.7462437395659435e-05,
+      "loss": 0.1347,
+      "step": 1720
+    },
+    {
+      "epoch": 0.46832701678397404,
+      "grad_norm": 2.25,
+      "learning_rate": 2.7323316638842515e-05,
+      "loss": 0.2037,
+      "step": 1730
+    },
+    {
+      "epoch": 0.47103410936654033,
+      "grad_norm": 0.7890625,
+      "learning_rate": 2.71841958820256e-05,
+      "loss": 0.1338,
+      "step": 1740
+    },
+    {
+      "epoch": 0.4737412019491067,
+      "grad_norm": 1.21875,
+      "learning_rate": 2.704507512520868e-05,
+      "loss": 0.1245,
+      "step": 1750
+    },
+    {
+      "epoch": 0.4737412019491067,
+      "eval_loss": 0.16117550432682037,
+      "eval_runtime": 64.6119,
+      "eval_samples_per_second": 7.739,
+      "eval_steps_per_second": 0.248,
+      "step": 1750
+    },
+    {
+      "epoch": 0.476448294531673,
+      "grad_norm": 1.796875,
+      "learning_rate": 2.6905954368391766e-05,
+      "loss": 0.1182,
+      "step": 1760
+    },
+    {
+      "epoch": 0.47915538711423933,
+      "grad_norm": 2.78125,
+      "learning_rate": 2.676683361157485e-05,
+      "loss": 0.1662,
+      "step": 1770
+    },
+    {
+      "epoch": 0.4818624796968056,
+      "grad_norm": 1.0546875,
+      "learning_rate": 2.662771285475793e-05,
+      "loss": 0.1597,
+      "step": 1780
+    },
+    {
+      "epoch": 0.484569572279372,
+      "grad_norm": 1.5703125,
+      "learning_rate": 2.648859209794101e-05,
+      "loss": 0.104,
+      "step": 1790
+    },
+    {
+      "epoch": 0.48727666486193827,
+      "grad_norm": 3.109375,
+      "learning_rate": 2.6349471341124098e-05,
+      "loss": 0.1149,
+      "step": 1800
+    },
+    {
+      "epoch": 0.48727666486193827,
+      "eval_loss": 0.1653529703617096,
+      "eval_runtime": 65.0831,
+      "eval_samples_per_second": 7.682,
+      "eval_steps_per_second": 0.246,
+      "step": 1800
+    },
+    {
+      "epoch": 0.4899837574445046,
+      "grad_norm": 0.60546875,
+      "learning_rate": 2.6210350584307182e-05,
+      "loss": 0.121,
+      "step": 1810
+    },
+    {
+      "epoch": 0.4926908500270709,
+      "grad_norm": 2.625,
+      "learning_rate": 2.6071229827490262e-05,
+      "loss": 0.1797,
+      "step": 1820
+    },
+    {
+      "epoch": 0.49539794260963727,
+      "grad_norm": 1.9609375,
+      "learning_rate": 2.5932109070673342e-05,
+      "loss": 0.1159,
+      "step": 1830
+    },
+    {
+      "epoch": 0.49810503519220356,
+      "grad_norm": 1.546875,
+      "learning_rate": 2.579298831385643e-05,
+      "loss": 0.1119,
+      "step": 1840
+    },
+    {
+      "epoch": 0.5008121277747699,
+      "grad_norm": 2.90625,
+      "learning_rate": 2.5653867557039513e-05,
+      "loss": 0.1367,
+      "step": 1850
+    },
+    {
+      "epoch": 0.5008121277747699,
+      "eval_loss": 0.16627325117588043,
+      "eval_runtime": 64.4654,
+      "eval_samples_per_second": 7.756,
+      "eval_steps_per_second": 0.248,
+      "step": 1850
+    },
+    {
+      "epoch": 0.5035192203573362,
+      "grad_norm": 1.1640625,
+      "learning_rate": 2.5514746800222594e-05,
+      "loss": 0.1591,
+      "step": 1860
+    },
+    {
+      "epoch": 0.5062263129399025,
+      "grad_norm": 2.4375,
+      "learning_rate": 2.5375626043405677e-05,
+      "loss": 0.1047,
+      "step": 1870
+    },
+    {
+      "epoch": 0.5089334055224689,
+      "grad_norm": 1.78125,
+      "learning_rate": 2.5236505286588765e-05,
+      "loss": 0.1172,
+      "step": 1880
+    },
+    {
+      "epoch": 0.5116404981050352,
+      "grad_norm": 1.234375,
+      "learning_rate": 2.5097384529771845e-05,
+      "loss": 0.1211,
+      "step": 1890
+    },
+    {
+      "epoch": 0.5143475906876015,
+      "grad_norm": 3.0,
+      "learning_rate": 2.4958263772954925e-05,
+      "loss": 0.1294,
+      "step": 1900
+    },
+    {
+      "epoch": 0.5143475906876015,
+      "eval_loss": 0.1589047610759735,
+      "eval_runtime": 63.4959,
+      "eval_samples_per_second": 7.875,
+      "eval_steps_per_second": 0.252,
+      "step": 1900
+    },
+    {
+      "epoch": 0.5170546832701678,
+      "grad_norm": 1.8984375,
+      "learning_rate": 2.481914301613801e-05,
+      "loss": 0.1343,
+      "step": 1910
+    },
+    {
+      "epoch": 0.5197617758527342,
+      "grad_norm": 2.0,
+      "learning_rate": 2.4680022259321093e-05,
+      "loss": 0.1357,
+      "step": 1920
+    },
+    {
+      "epoch": 0.5224688684353005,
+      "grad_norm": 2.671875,
+      "learning_rate": 2.4540901502504173e-05,
+      "loss": 0.143,
+      "step": 1930
+    },
+    {
+      "epoch": 0.5251759610178668,
+      "grad_norm": 1.3828125,
+      "learning_rate": 2.4401780745687257e-05,
+      "loss": 0.1321,
+      "step": 1940
+    },
+    {
+      "epoch": 0.5278830536004331,
+      "grad_norm": 2.046875,
+      "learning_rate": 2.426265998887034e-05,
+      "loss": 0.1663,
+      "step": 1950
+    },
+    {
+      "epoch": 0.5278830536004331,
+      "eval_loss": 0.15560173988342285,
+      "eval_runtime": 63.7793,
+      "eval_samples_per_second": 7.84,
+      "eval_steps_per_second": 0.251,
+      "step": 1950
+    },
+    {
+      "epoch": 0.5305901461829995,
+      "grad_norm": 0.9296875,
+      "learning_rate": 2.4123539232053424e-05,
+      "loss": 0.0997,
+      "step": 1960
+    },
+    {
+      "epoch": 0.5332972387655658,
+      "grad_norm": 2.015625,
+      "learning_rate": 2.3984418475236505e-05,
+      "loss": 0.1283,
+      "step": 1970
+    },
+    {
+      "epoch": 0.5360043313481321,
+      "grad_norm": 0.89453125,
+      "learning_rate": 2.3845297718419592e-05,
+      "loss": 0.1407,
+      "step": 1980
+    },
+    {
+      "epoch": 0.5387114239306985,
+      "grad_norm": 0.8046875,
+      "learning_rate": 2.3706176961602672e-05,
+      "loss": 0.1171,
+      "step": 1990
+    },
+    {
+      "epoch": 0.5414185165132648,
+      "grad_norm": 1.296875,
+      "learning_rate": 2.3567056204785756e-05,
+      "loss": 0.1288,
+      "step": 2000
+    },
+    {
+      "epoch": 0.5414185165132648,
+      "eval_loss": 0.15494494140148163,
+      "eval_runtime": 79.2552,
+      "eval_samples_per_second": 6.309,
+      "eval_steps_per_second": 0.202,
+      "step": 2000
+    },
+    {
+      "epoch": 0.5441256090958311,
+      "grad_norm": 2.015625,
+      "learning_rate": 2.3427935447968836e-05,
+      "loss": 0.1055,
+      "step": 2010
+    },
+    {
+      "epoch": 0.5468327016783974,
+      "grad_norm": 1.140625,
+      "learning_rate": 2.3288814691151924e-05,
+      "loss": 0.1142,
+      "step": 2020
+    },
+    {
+      "epoch": 0.5495397942609638,
+      "grad_norm": 1.5703125,
+      "learning_rate": 2.3149693934335004e-05,
+      "loss": 0.1415,
+      "step": 2030
+    },
+    {
+      "epoch": 0.5522468868435301,
+      "grad_norm": 0.609375,
+      "learning_rate": 2.3010573177518084e-05,
+      "loss": 0.1086,
+      "step": 2040
+    },
+    {
+      "epoch": 0.5549539794260964,
+      "grad_norm": 2.921875,
+      "learning_rate": 2.287145242070117e-05,
+      "loss": 0.1028,
+      "step": 2050
+    },
+    {
+      "epoch": 0.5549539794260964,
+      "eval_loss": 0.1615983545780182,
+      "eval_runtime": 64.4361,
+      "eval_samples_per_second": 7.76,
+      "eval_steps_per_second": 0.248,
+      "step": 2050
+    },
+    {
+      "epoch": 0.5576610720086627,
+      "grad_norm": 1.6640625,
+      "learning_rate": 2.2732331663884252e-05,
+      "loss": 0.1268,
+      "step": 2060
+    },
+    {
+      "epoch": 0.5603681645912291,
+      "grad_norm": 0.55078125,
+      "learning_rate": 2.2593210907067336e-05,
+      "loss": 0.1187,
+      "step": 2070
+    },
+    {
+      "epoch": 0.5630752571737954,
+      "grad_norm": 1.703125,
+      "learning_rate": 2.2454090150250416e-05,
+      "loss": 0.1135,
+      "step": 2080
+    },
+    {
+      "epoch": 0.5657823497563617,
+      "grad_norm": 2.71875,
+      "learning_rate": 2.2314969393433503e-05,
+      "loss": 0.1625,
+      "step": 2090
+    },
+    {
+      "epoch": 0.568489442338928,
+      "grad_norm": 1.3125,
+      "learning_rate": 2.2175848636616583e-05,
+      "loss": 0.1095,
+      "step": 2100
+    },
+    {
+      "epoch": 0.568489442338928,
+      "eval_loss": 0.16485995054244995,
+      "eval_runtime": 63.0678,
+      "eval_samples_per_second": 7.928,
+      "eval_steps_per_second": 0.254,
+      "step": 2100
+    },
+    {
+      "epoch": 0.5711965349214944,
+      "grad_norm": 0.890625,
+      "learning_rate": 2.2036727879799667e-05,
+      "loss": 0.1131,
+      "step": 2110
+    },
+    {
+      "epoch": 0.5739036275040607,
+      "grad_norm": 2.296875,
+      "learning_rate": 2.189760712298275e-05,
+      "loss": 0.0784,
+      "step": 2120
+    },
+    {
+      "epoch": 0.576610720086627,
+      "grad_norm": 3.4375,
+      "learning_rate": 2.1758486366165835e-05,
+      "loss": 0.1548,
+      "step": 2130
+    },
+    {
+      "epoch": 0.5793178126691932,
+      "grad_norm": 0.671875,
+      "learning_rate": 2.1619365609348915e-05,
+      "loss": 0.1285,
+      "step": 2140
+    },
+    {
+      "epoch": 0.5820249052517596,
+      "grad_norm": 2.265625,
+      "learning_rate": 2.1480244852532e-05,
+      "loss": 0.1333,
+      "step": 2150
+    },
+    {
+      "epoch": 0.5820249052517596,
+      "eval_loss": 0.1646082103252411,
+      "eval_runtime": 64.2796,
+      "eval_samples_per_second": 7.779,
+      "eval_steps_per_second": 0.249,
+      "step": 2150
+    },
+    {
+      "epoch": 0.584731997834326,
+      "grad_norm": 1.71875,
+      "learning_rate": 2.1341124095715083e-05,
+      "loss": 0.1197,
+      "step": 2160
+    },
+    {
+      "epoch": 0.5874390904168922,
+      "grad_norm": 0.5703125,
+      "learning_rate": 2.1202003338898166e-05,
+      "loss": 0.151,
+      "step": 2170
+    },
+    {
+      "epoch": 0.5901461829994585,
+      "grad_norm": 1.828125,
+      "learning_rate": 2.1062882582081247e-05,
+      "loss": 0.0958,
+      "step": 2180
+    },
+    {
+      "epoch": 0.5928532755820249,
+      "grad_norm": 3.15625,
+      "learning_rate": 2.092376182526433e-05,
+      "loss": 0.1853,
+      "step": 2190
+    },
+    {
+      "epoch": 0.5955603681645912,
+      "grad_norm": 2.34375,
+      "learning_rate": 2.0784641068447414e-05,
+      "loss": 0.1199,
+      "step": 2200
+    },
+    {
+      "epoch": 0.5955603681645912,
+      "eval_loss": 0.162201389670372,
+      "eval_runtime": 64.0196,
+      "eval_samples_per_second": 7.81,
+      "eval_steps_per_second": 0.25,
+      "step": 2200
+    },
+    {
+      "epoch": 0.5982674607471575,
+      "grad_norm": 1.34375,
+      "learning_rate": 2.0645520311630495e-05,
+      "loss": 0.1661,
+      "step": 2210
+    },
+    {
+      "epoch": 0.6009745533297238,
+      "grad_norm": 1.5703125,
+      "learning_rate": 2.0506399554813578e-05,
+      "loss": 0.1106,
+      "step": 2220
+    },
+    {
+      "epoch": 0.6036816459122902,
+      "grad_norm": 3.65625,
+      "learning_rate": 2.0367278797996662e-05,
+      "loss": 0.1747,
+      "step": 2230
+    },
+    {
+      "epoch": 0.6063887384948565,
+      "grad_norm": 0.828125,
+      "learning_rate": 2.0228158041179746e-05,
+      "loss": 0.1247,
+      "step": 2240
+    },
+    {
+      "epoch": 0.6090958310774228,
+      "grad_norm": 0.7734375,
+      "learning_rate": 2.0089037284362826e-05,
+      "loss": 0.0886,
+      "step": 2250
+    },
+    {
+      "epoch": 0.6090958310774228,
+      "eval_loss": 0.15883322060108185,
+      "eval_runtime": 63.686,
+      "eval_samples_per_second": 7.851,
+      "eval_steps_per_second": 0.251,
+      "step": 2250
+    },
+    {
+      "epoch": 0.6118029236599891,
+      "grad_norm": 2.046875,
+      "learning_rate": 1.994991652754591e-05,
+      "loss": 0.1134,
+      "step": 2260
+    },
+    {
+      "epoch": 0.6145100162425555,
+      "grad_norm": 2.296875,
+      "learning_rate": 1.9810795770728994e-05,
+      "loss": 0.1273,
+      "step": 2270
+    },
+    {
+      "epoch": 0.6172171088251218,
+      "grad_norm": 1.3671875,
+      "learning_rate": 1.9671675013912077e-05,
+      "loss": 0.118,
+      "step": 2280
+    },
+    {
+      "epoch": 0.6199242014076881,
+      "grad_norm": 1.3203125,
+      "learning_rate": 1.9532554257095158e-05,
+      "loss": 0.1127,
+      "step": 2290
+    },
+    {
+      "epoch": 0.6226312939902545,
+      "grad_norm": 1.703125,
+      "learning_rate": 1.9393433500278245e-05,
+      "loss": 0.1319,
+      "step": 2300
+    },
+    {
+      "epoch": 0.6226312939902545,
+      "eval_loss": 0.1598823368549347,
+      "eval_runtime": 63.542,
+      "eval_samples_per_second": 7.869,
+      "eval_steps_per_second": 0.252,
+      "step": 2300
+    },
+    {
+      "epoch": 0.6253383865728208,
+      "grad_norm": 1.75,
+      "learning_rate": 1.9254312743461325e-05,
+      "loss": 0.1115,
+      "step": 2310
+    },
+    {
+      "epoch": 0.6280454791553871,
+      "grad_norm": 2.3125,
+      "learning_rate": 1.911519198664441e-05,
+      "loss": 0.1411,
+      "step": 2320
+    },
+    {
+      "epoch": 0.6307525717379534,
+      "grad_norm": 1.1484375,
+      "learning_rate": 1.8976071229827493e-05,
+      "loss": 0.1097,
+      "step": 2330
+    },
+    {
+      "epoch": 0.6334596643205198,
+      "grad_norm": 3.5,
+      "learning_rate": 1.8836950473010573e-05,
+      "loss": 0.1072,
+      "step": 2340
+    },
+    {
+      "epoch": 0.6361667569030861,
+      "grad_norm": 1.09375,
+      "learning_rate": 1.8697829716193657e-05,
+      "loss": 0.1215,
+      "step": 2350
+    },
+    {
+      "epoch": 0.6361667569030861,
+      "eval_loss": 0.15923769772052765,
+      "eval_runtime": 62.8745,
+      "eval_samples_per_second": 7.952,
+      "eval_steps_per_second": 0.254,
+      "step": 2350
+    },
+    {
+      "epoch": 0.6388738494856524,
+      "grad_norm": 2.78125,
+      "learning_rate": 1.8558708959376737e-05,
+      "loss": 0.0999,
+      "step": 2360
+    },
+    {
+      "epoch": 0.6415809420682187,
+      "grad_norm": 0.5390625,
+      "learning_rate": 1.8419588202559824e-05,
+      "loss": 0.134,
+      "step": 2370
+    },
+    {
+      "epoch": 0.6442880346507851,
+      "grad_norm": 3.703125,
+      "learning_rate": 1.8280467445742905e-05,
+      "loss": 0.1648,
+      "step": 2380
+    },
+    {
+      "epoch": 0.6469951272333514,
+      "grad_norm": 2.046875,
+      "learning_rate": 1.814134668892599e-05,
+      "loss": 0.1272,
+      "step": 2390
+    },
+    {
+      "epoch": 0.6497022198159177,
+      "grad_norm": 0.91796875,
+      "learning_rate": 1.8002225932109072e-05,
+      "loss": 0.1202,
+      "step": 2400
+    },
+    {
+      "epoch": 0.6497022198159177,
+      "eval_loss": 0.156494602560997,
+      "eval_runtime": 63.7775,
+      "eval_samples_per_second": 7.84,
+      "eval_steps_per_second": 0.251,
+      "step": 2400
+    },
+    {
+      "epoch": 0.652409312398484,
+      "grad_norm": 6.3125,
+      "learning_rate": 1.7863105175292156e-05,
+      "loss": 0.1491,
+      "step": 2410
+    },
+    {
+      "epoch": 0.6551164049810504,
+      "grad_norm": 1.65625,
+      "learning_rate": 1.7723984418475236e-05,
+      "loss": 0.1209,
+      "step": 2420
+    },
+    {
+      "epoch": 0.6578234975636167,
+      "grad_norm": 0.73828125,
+      "learning_rate": 1.758486366165832e-05,
+      "loss": 0.1295,
+      "step": 2430
+    },
+    {
+      "epoch": 0.660530590146183,
+      "grad_norm": 2.765625,
+      "learning_rate": 1.7445742904841404e-05,
+      "loss": 0.1013,
+      "step": 2440
+    },
+    {
+      "epoch": 0.6632376827287493,
+      "grad_norm": 0.53515625,
+      "learning_rate": 1.7306622148024488e-05,
+      "loss": 0.1537,
+      "step": 2450
+    },
+    {
+      "epoch": 0.6632376827287493,
+      "eval_loss": 0.15601959824562073,
+      "eval_runtime": 64.3267,
+      "eval_samples_per_second": 7.773,
+      "eval_steps_per_second": 0.249,
+      "step": 2450
+    },
+    {
+      "epoch": 0.6659447753113157,
+      "grad_norm": 2.984375,
+      "learning_rate": 1.7167501391207568e-05,
+      "loss": 0.1364,
+      "step": 2460
+    },
+    {
+      "epoch": 0.668651867893882,
+      "grad_norm": 1.8828125,
+      "learning_rate": 1.7028380634390652e-05,
+      "loss": 0.1121,
+      "step": 2470
+    },
+    {
+      "epoch": 0.6713589604764483,
+      "grad_norm": 1.40625,
+      "learning_rate": 1.6889259877573735e-05,
+      "loss": 0.135,
+      "step": 2480
+    },
+    {
+      "epoch": 0.6740660530590146,
+      "grad_norm": 2.609375,
+      "learning_rate": 1.6750139120756816e-05,
+      "loss": 0.1312,
+      "step": 2490
+    },
+    {
+      "epoch": 0.676773145641581,
+      "grad_norm": 1.1015625,
+      "learning_rate": 1.66110183639399e-05,
+      "loss": 0.1146,
+      "step": 2500
+    },
+    {
+      "epoch": 0.676773145641581,
+      "eval_loss": 0.15720410645008087,
+      "eval_runtime": 82.5469,
+      "eval_samples_per_second": 6.057,
+      "eval_steps_per_second": 0.194,
+      "step": 2500
+    },
+    {
+      "epoch": 0.6794802382241473,
+      "grad_norm": 2.921875,
+      "learning_rate": 1.6471897607122983e-05,
+      "loss": 0.132,
+      "step": 2510
+    },
+    {
+      "epoch": 0.6821873308067136,
+      "grad_norm": 1.109375,
+      "learning_rate": 1.6332776850306067e-05,
+      "loss": 0.1216,
+      "step": 2520
+    },
+    {
+      "epoch": 0.6848944233892799,
+      "grad_norm": 1.0625,
+      "learning_rate": 1.6193656093489147e-05,
+      "loss": 0.1181,
+      "step": 2530
+    },
+    {
+      "epoch": 0.6876015159718463,
+      "grad_norm": 0.236328125,
+      "learning_rate": 1.605453533667223e-05,
+      "loss": 0.1067,
+      "step": 2540
+    },
+    {
+      "epoch": 0.6903086085544126,
+      "grad_norm": 1.5,
+      "learning_rate": 1.5915414579855315e-05,
+      "loss": 0.1363,
+      "step": 2550
+    },
+    {
+      "epoch": 0.6903086085544126,
+      "eval_loss": 0.15796752274036407,
+      "eval_runtime": 64.1468,
+      "eval_samples_per_second": 7.795,
+      "eval_steps_per_second": 0.249,
+      "step": 2550
+    },
+    {
+      "epoch": 0.6930157011369789,
+      "grad_norm": 1.5703125,
+      "learning_rate": 1.57762938230384e-05,
+      "loss": 0.0929,
+      "step": 2560
+    },
+    {
+      "epoch": 0.6957227937195453,
+      "grad_norm": 0.94140625,
+      "learning_rate": 1.563717306622148e-05,
+      "loss": 0.1125,
+      "step": 2570
+    },
+    {
+      "epoch": 0.6984298863021116,
+      "grad_norm": 2.140625,
+      "learning_rate": 1.5498052309404566e-05,
+      "loss": 0.1053,
+      "step": 2580
+    },
+    {
+      "epoch": 0.7011369788846779,
+      "grad_norm": 2.578125,
+      "learning_rate": 1.5358931552587647e-05,
+      "loss": 0.111,
+      "step": 2590
+    },
+    {
+      "epoch": 0.7038440714672441,
+      "grad_norm": 2.484375,
+      "learning_rate": 1.521981079577073e-05,
+      "loss": 0.111,
+      "step": 2600
+    },
+    {
+      "epoch": 0.7038440714672441,
+      "eval_loss": 0.15660835802555084,
+      "eval_runtime": 64.7015,
+      "eval_samples_per_second": 7.728,
+      "eval_steps_per_second": 0.247,
+      "step": 2600
+    },
+    {
+      "epoch": 0.7065511640498106,
+      "grad_norm": 0.69921875,
+      "learning_rate": 1.5080690038953812e-05,
+      "loss": 0.1276,
+      "step": 2610
+    },
+    {
+      "epoch": 0.7092582566323768,
+      "grad_norm": 1.6328125,
+      "learning_rate": 1.4941569282136896e-05,
+      "loss": 0.1508,
+      "step": 2620
+    },
+    {
+      "epoch": 0.7119653492149431,
+      "grad_norm": 2.265625,
+      "learning_rate": 1.4802448525319978e-05,
+      "loss": 0.1717,
+      "step": 2630
+    },
+    {
+      "epoch": 0.7146724417975094,
+      "grad_norm": 1.9765625,
+      "learning_rate": 1.466332776850306e-05,
+      "loss": 0.1422,
+      "step": 2640
+    },
+    {
+      "epoch": 0.7173795343800758,
+      "grad_norm": 1.1796875,
+      "learning_rate": 1.4524207011686144e-05,
+      "loss": 0.1003,
+      "step": 2650
+    },
+    {
+      "epoch": 0.7173795343800758,
+      "eval_loss": 0.15460146963596344,
+      "eval_runtime": 65.1755,
+      "eval_samples_per_second": 7.672,
+      "eval_steps_per_second": 0.245,
+      "step": 2650
+    },
+    {
+      "epoch": 0.7200866269626421,
+      "grad_norm": 1.796875,
+      "learning_rate": 1.4385086254869226e-05,
+      "loss": 0.1366,
+      "step": 2660
+    },
+    {
+      "epoch": 0.7227937195452084,
+      "grad_norm": 1.59375,
+      "learning_rate": 1.424596549805231e-05,
+      "loss": 0.1316,
+      "step": 2670
+    },
+    {
+      "epoch": 0.7255008121277747,
+      "grad_norm": 2.671875,
+      "learning_rate": 1.4106844741235392e-05,
+      "loss": 0.1367,
+      "step": 2680
+    },
+    {
+      "epoch": 0.7282079047103411,
+      "grad_norm": 1.75,
+      "learning_rate": 1.3967723984418477e-05,
+      "loss": 0.13,
+      "step": 2690
+    },
+    {
+      "epoch": 0.7309149972929074,
+      "grad_norm": 2.96875,
+      "learning_rate": 1.3828603227601558e-05,
+      "loss": 0.1213,
+      "step": 2700
+    },
+    {
+      "epoch": 0.7309149972929074,
+      "eval_loss": 0.15699392557144165,
+      "eval_runtime": 64.8654,
+      "eval_samples_per_second": 7.708,
+      "eval_steps_per_second": 0.247,
+      "step": 2700
+    },
+    {
+      "epoch": 0.7336220898754737,
+      "grad_norm": 3.171875,
+      "learning_rate": 1.3689482470784643e-05,
+      "loss": 0.1303,
+      "step": 2710
+    },
+    {
+      "epoch": 0.73632918245804,
+      "grad_norm": 0.78125,
+      "learning_rate": 1.3550361713967724e-05,
+      "loss": 0.1496,
+      "step": 2720
+    },
+    {
+      "epoch": 0.7390362750406064,
+      "grad_norm": 1.2421875,
+      "learning_rate": 1.3411240957150809e-05,
+      "loss": 0.1129,
+      "step": 2730
+    },
+    {
+      "epoch": 0.7417433676231727,
+      "grad_norm": 2.703125,
+      "learning_rate": 1.327212020033389e-05,
+      "loss": 0.128,
+      "step": 2740
+    },
+    {
+      "epoch": 0.744450460205739,
+      "grad_norm": 2.328125,
+      "learning_rate": 1.3132999443516975e-05,
+      "loss": 0.1081,
+      "step": 2750
+    },
+    {
+      "epoch": 0.744450460205739,
+      "eval_loss": 0.1565748006105423,
+      "eval_runtime": 64.732,
+      "eval_samples_per_second": 7.724,
+      "eval_steps_per_second": 0.247,
+      "step": 2750
+    },
+    {
+      "epoch": 0.7471575527883053,
+      "grad_norm": 1.6015625,
+      "learning_rate": 1.2993878686700057e-05,
+      "loss": 0.1355,
+      "step": 2760
+    },
+    {
+      "epoch": 0.7498646453708717,
+      "grad_norm": 1.1015625,
+      "learning_rate": 1.285475792988314e-05,
+      "loss": 0.1341,
+      "step": 2770
+    },
+    {
+      "epoch": 0.752571737953438,
+      "grad_norm": 0.98828125,
+      "learning_rate": 1.2715637173066223e-05,
+      "loss": 0.1169,
+      "step": 2780
+    },
+    {
+      "epoch": 0.7552788305360043,
+      "grad_norm": 0.60546875,
+      "learning_rate": 1.2576516416249303e-05,
+      "loss": 0.0924,
+      "step": 2790
+    },
+    {
+      "epoch": 0.7579859231185706,
+      "grad_norm": 1.4921875,
+      "learning_rate": 1.2437395659432388e-05,
+      "loss": 0.1634,
+      "step": 2800
+    },
+    {
+      "epoch": 0.7579859231185706,
+      "eval_loss": 0.1555679589509964,
+      "eval_runtime": 65.0072,
+      "eval_samples_per_second": 7.691,
+      "eval_steps_per_second": 0.246,
+      "step": 2800
+    },
+    {
+      "epoch": 0.760693015701137,
+      "grad_norm": 1.6796875,
+      "learning_rate": 1.229827490261547e-05,
+      "loss": 0.1255,
+      "step": 2810
+    },
+    {
+      "epoch": 0.7634001082837033,
+      "grad_norm": 0.83984375,
+      "learning_rate": 1.2159154145798554e-05,
+      "loss": 0.0838,
+      "step": 2820
+    },
+    {
+      "epoch": 0.7661072008662696,
+      "grad_norm": 3.375,
+      "learning_rate": 1.2020033388981636e-05,
+      "loss": 0.1031,
+      "step": 2830
+    },
+    {
+      "epoch": 0.7688142934488359,
+      "grad_norm": 1.5,
+      "learning_rate": 1.188091263216472e-05,
+      "loss": 0.1125,
+      "step": 2840
+    },
+    {
+      "epoch": 0.7715213860314023,
+      "grad_norm": 1.7421875,
+      "learning_rate": 1.1741791875347802e-05,
+      "loss": 0.1427,
+      "step": 2850
+    },
+    {
+      "epoch": 0.7715213860314023,
+      "eval_loss": 0.15634971857070923,
+      "eval_runtime": 64.5468,
+      "eval_samples_per_second": 7.746,
+      "eval_steps_per_second": 0.248,
+      "step": 2850
+    },
+    {
+      "epoch": 0.7742284786139686,
+      "grad_norm": 2.484375,
+      "learning_rate": 1.1602671118530884e-05,
+      "loss": 0.1233,
+      "step": 2860
+    },
+    {
+      "epoch": 0.7769355711965349,
+      "grad_norm": 0.95703125,
+      "learning_rate": 1.1463550361713968e-05,
+      "loss": 0.1146,
+      "step": 2870
+    },
+    {
+      "epoch": 0.7796426637791013,
+      "grad_norm": 1.5703125,
+      "learning_rate": 1.132442960489705e-05,
+      "loss": 0.1301,
+      "step": 2880
+    },
+    {
+      "epoch": 0.7823497563616676,
+      "grad_norm": 1.515625,
+      "learning_rate": 1.1185308848080134e-05,
+      "loss": 0.1238,
+      "step": 2890
+    },
+    {
+      "epoch": 0.7850568489442339,
+      "grad_norm": 1.828125,
+      "learning_rate": 1.1046188091263218e-05,
+      "loss": 0.1161,
+      "step": 2900
+    },
+    {
+      "epoch": 0.7850568489442339,
+      "eval_loss": 0.1560017168521881,
+      "eval_runtime": 64.4442,
+      "eval_samples_per_second": 7.759,
+      "eval_steps_per_second": 0.248,
+      "step": 2900
+    },
+    {
+      "epoch": 0.7877639415268002,
+      "grad_norm": 1.03125,
+      "learning_rate": 1.09070673344463e-05,
+      "loss": 0.0932,
+      "step": 2910
+    },
+    {
+      "epoch": 0.7904710341093666,
+      "grad_norm": 1.1171875,
+      "learning_rate": 1.0767946577629383e-05,
+      "loss": 0.1132,
+      "step": 2920
+    },
+    {
+      "epoch": 0.7931781266919329,
+      "grad_norm": 1.3125,
+      "learning_rate": 1.0628825820812465e-05,
+      "loss": 0.1272,
+      "step": 2930
+    },
+    {
+      "epoch": 0.7958852192744992,
+      "grad_norm": 2.109375,
+      "learning_rate": 1.0489705063995549e-05,
+      "loss": 0.1251,
+      "step": 2940
+    },
+    {
+      "epoch": 0.7985923118570655,
+      "grad_norm": 1.7734375,
+      "learning_rate": 1.0350584307178631e-05,
+      "loss": 0.1188,
+      "step": 2950
+    },
+    {
+      "epoch": 0.7985923118570655,
+      "eval_loss": 0.1543656885623932,
+      "eval_runtime": 63.1528,
+      "eval_samples_per_second": 7.917,
+      "eval_steps_per_second": 0.253,
+      "step": 2950
+    },
+    {
+      "epoch": 0.8012994044396319,
+      "grad_norm": 1.796875,
+      "learning_rate": 1.0211463550361715e-05,
+      "loss": 0.1261,
+      "step": 2960
+    },
+    {
+      "epoch": 0.8040064970221982,
+      "grad_norm": 0.67578125,
+      "learning_rate": 1.0072342793544797e-05,
+      "loss": 0.1121,
+      "step": 2970
+    },
+    {
+      "epoch": 0.8067135896047645,
+      "grad_norm": 1.0390625,
+      "learning_rate": 9.93322203672788e-06,
+      "loss": 0.1267,
+      "step": 2980
+    },
+    {
+      "epoch": 0.8094206821873308,
+      "grad_norm": 1.8125,
+      "learning_rate": 9.794101279910965e-06,
+      "loss": 0.0998,
+      "step": 2990
+    },
+    {
+      "epoch": 0.8121277747698972,
+      "grad_norm": 1.8984375,
+      "learning_rate": 9.654980523094045e-06,
+      "loss": 0.1153,
+      "step": 3000
+    },
+    {
+      "epoch": 0.8121277747698972,
+      "eval_loss": 0.15634942054748535,
+      "eval_runtime": 78.3242,
+      "eval_samples_per_second": 6.384,
+      "eval_steps_per_second": 0.204,
+      "step": 3000
+    },
+    {
+      "epoch": 0.8148348673524635,
+      "grad_norm": 0.8515625,
+      "learning_rate": 9.515859766277129e-06,
+      "loss": 0.1116,
+      "step": 3010
+    },
+    {
+      "epoch": 0.8175419599350298,
+      "grad_norm": 1.5859375,
+      "learning_rate": 9.37673900946021e-06,
+      "loss": 0.1083,
+      "step": 3020
+    },
+    {
+      "epoch": 0.8202490525175961,
+      "grad_norm": 1.15625,
+      "learning_rate": 9.237618252643294e-06,
+      "loss": 0.1535,
+      "step": 3030
+    },
+    {
+      "epoch": 0.8229561451001625,
+      "grad_norm": 1.1796875,
+      "learning_rate": 9.098497495826378e-06,
+      "loss": 0.1179,
+      "step": 3040
+    },
+    {
+      "epoch": 0.8256632376827288,
+      "grad_norm": 1.1875,
+      "learning_rate": 8.95937673900946e-06,
+      "loss": 0.1154,
+      "step": 3050
+    },
+    {
+      "epoch": 0.8256632376827288,
+      "eval_loss": 0.15425148606300354,
+      "eval_runtime": 62.8908,
+      "eval_samples_per_second": 7.95,
+      "eval_steps_per_second": 0.254,
+      "step": 3050
+    },
+    {
+      "epoch": 0.828370330265295,
+      "grad_norm": 4.1875,
+      "learning_rate": 8.820255982192544e-06,
+      "loss": 0.1071,
+      "step": 3060
+    },
+    {
+      "epoch": 0.8310774228478613,
+      "grad_norm": 6.0625,
+      "learning_rate": 8.681135225375626e-06,
+      "loss": 0.1563,
+      "step": 3070
+    },
+    {
+      "epoch": 0.8337845154304278,
+      "grad_norm": 2.234375,
+      "learning_rate": 8.54201446855871e-06,
+      "loss": 0.1412,
+      "step": 3080
+    },
+    {
+      "epoch": 0.836491608012994,
+      "grad_norm": 1.2421875,
+      "learning_rate": 8.402893711741792e-06,
+      "loss": 0.0991,
+      "step": 3090
+    },
+    {
+      "epoch": 0.8391987005955603,
+      "grad_norm": 1.234375,
+      "learning_rate": 8.263772954924876e-06,
+      "loss": 0.1098,
+      "step": 3100
+    },
+    {
+      "epoch": 0.8391987005955603,
+      "eval_loss": 0.15568658709526062,
+      "eval_runtime": 63.402,
+      "eval_samples_per_second": 7.886,
+      "eval_steps_per_second": 0.252,
+      "step": 3100
+    },
+    {
+      "epoch": 0.8419057931781266,
+      "grad_norm": 2.015625,
+      "learning_rate": 8.124652198107958e-06,
+      "loss": 0.1556,
+      "step": 3110
+    },
+    {
+      "epoch": 0.844612885760693,
+      "grad_norm": 4.3125,
+      "learning_rate": 7.985531441291041e-06,
+      "loss": 0.1596,
+      "step": 3120
+    },
+    {
+      "epoch": 0.8473199783432593,
+      "grad_norm": 2.046875,
+      "learning_rate": 7.846410684474125e-06,
+      "loss": 0.1437,
+      "step": 3130
+    },
+    {
+      "epoch": 0.8500270709258256,
+      "grad_norm": 1.15625,
+      "learning_rate": 7.707289927657207e-06,
+      "loss": 0.1511,
+      "step": 3140
+    },
+    {
+      "epoch": 0.852734163508392,
+      "grad_norm": 1.9765625,
+      "learning_rate": 7.568169170840289e-06,
+      "loss": 0.1559,
+      "step": 3150
+    },
+    {
+      "epoch": 0.852734163508392,
+      "eval_loss": 0.15518584847450256,
+      "eval_runtime": 63.1606,
+      "eval_samples_per_second": 7.916,
+      "eval_steps_per_second": 0.253,
+      "step": 3150
+    },
+    {
+      "epoch": 0.8554412560909583,
+      "grad_norm": 1.9921875,
+      "learning_rate": 7.429048414023372e-06,
+      "loss": 0.1392,
+      "step": 3160
+    },
+    {
+      "epoch": 0.8581483486735246,
+      "grad_norm": 2.6875,
+      "learning_rate": 7.289927657206455e-06,
+      "loss": 0.1623,
+      "step": 3170
+    },
+    {
+      "epoch": 0.8608554412560909,
+      "grad_norm": 3.0625,
+      "learning_rate": 7.150806900389538e-06,
+      "loss": 0.1358,
+      "step": 3180
+    },
+    {
+      "epoch": 0.8635625338386573,
+      "grad_norm": 2.375,
+      "learning_rate": 7.011686143572621e-06,
+      "loss": 0.1483,
+      "step": 3190
+    },
+    {
+      "epoch": 0.8662696264212236,
+      "grad_norm": 2.578125,
+      "learning_rate": 6.872565386755704e-06,
+      "loss": 0.1207,
+      "step": 3200
+    },
+    {
+      "epoch": 0.8662696264212236,
+      "eval_loss": 0.15542198717594147,
+      "eval_runtime": 63.3581,
+      "eval_samples_per_second": 7.892,
+      "eval_steps_per_second": 0.253,
+      "step": 3200
+    },
+    {
+      "epoch": 0.8689767190037899,
+      "grad_norm": 1.078125,
+      "learning_rate": 6.733444629938787e-06,
+      "loss": 0.1102,
+      "step": 3210
+    },
+    {
+      "epoch": 0.8716838115863562,
+      "grad_norm": 1.2421875,
+      "learning_rate": 6.5943238731218705e-06,
+      "loss": 0.1394,
+      "step": 3220
+    },
+    {
+      "epoch": 0.8743909041689226,
+      "grad_norm": 2.0625,
+      "learning_rate": 6.455203116304953e-06,
+      "loss": 0.1495,
+      "step": 3230
+    },
+    {
+      "epoch": 0.8770979967514889,
+      "grad_norm": 0.7890625,
+      "learning_rate": 6.316082359488036e-06,
+      "loss": 0.1472,
+      "step": 3240
+    },
+    {
+      "epoch": 0.8798050893340552,
+      "grad_norm": 6.09375,
+      "learning_rate": 6.176961602671119e-06,
+      "loss": 0.1169,
+      "step": 3250
+    },
+    {
+      "epoch": 0.8798050893340552,
+      "eval_loss": 0.15612711012363434,
+      "eval_runtime": 62.8499,
+      "eval_samples_per_second": 7.955,
+      "eval_steps_per_second": 0.255,
+      "step": 3250
+    },
+    {
+      "epoch": 0.8825121819166215,
+      "grad_norm": 1.1328125,
+      "learning_rate": 6.037840845854201e-06,
+      "loss": 0.1208,
+      "step": 3260
+    },
+    {
+      "epoch": 0.8852192744991879,
+      "grad_norm": 1.78125,
+      "learning_rate": 5.898720089037284e-06,
+      "loss": 0.1145,
+      "step": 3270
+    },
+    {
+      "epoch": 0.8879263670817542,
+      "grad_norm": 3.21875,
+      "learning_rate": 5.759599332220367e-06,
+      "loss": 0.1341,
+      "step": 3280
+    },
+    {
+      "epoch": 0.8906334596643205,
+      "grad_norm": 2.265625,
+      "learning_rate": 5.620478575403451e-06,
+      "loss": 0.1278,
+      "step": 3290
+    },
+    {
+      "epoch": 0.8933405522468868,
+      "grad_norm": 2.65625,
+      "learning_rate": 5.481357818586534e-06,
+      "loss": 0.1932,
+      "step": 3300
+    },
+    {
+      "epoch": 0.8933405522468868,
+      "eval_loss": 0.15443623065948486,
+      "eval_runtime": 63.1624,
+      "eval_samples_per_second": 7.916,
+      "eval_steps_per_second": 0.253,
+      "step": 3300
+    },
+    {
+      "epoch": 0.8960476448294532,
+      "grad_norm": 2.3125,
+      "learning_rate": 5.342237061769617e-06,
+      "loss": 0.113,
+      "step": 3310
+    },
+    {
+      "epoch": 0.8987547374120195,
+      "grad_norm": 1.921875,
+      "learning_rate": 5.2031163049526995e-06,
+      "loss": 0.0939,
+      "step": 3320
+    },
+    {
+      "epoch": 0.9014618299945858,
+      "grad_norm": 3.125,
+      "learning_rate": 5.0639955481357824e-06,
+      "loss": 0.122,
+      "step": 3330
+    },
+    {
+      "epoch": 0.9041689225771521,
+      "grad_norm": 0.84765625,
+      "learning_rate": 4.9248747913188645e-06,
+      "loss": 0.0583,
+      "step": 3340
+    },
+    {
+      "epoch": 0.9068760151597185,
+      "grad_norm": 1.3125,
+      "learning_rate": 4.785754034501947e-06,
+      "loss": 0.0985,
+      "step": 3350
+    },
+    {
+      "epoch": 0.9068760151597185,
+      "eval_loss": 0.1550075113773346,
+      "eval_runtime": 63.4539,
+      "eval_samples_per_second": 7.88,
+      "eval_steps_per_second": 0.252,
+      "step": 3350
+    },
+    {
+      "epoch": 0.9095831077422848,
+      "grad_norm": 1.765625,
+      "learning_rate": 4.646633277685031e-06,
+      "loss": 0.1216,
+      "step": 3360
+    },
+    {
+      "epoch": 0.9122902003248511,
+      "grad_norm": 1.25,
+      "learning_rate": 4.507512520868114e-06,
+      "loss": 0.1355,
+      "step": 3370
+    },
+    {
+      "epoch": 0.9149972929074174,
+      "grad_norm": 0.2236328125,
+      "learning_rate": 4.368391764051197e-06,
+      "loss": 0.1101,
+      "step": 3380
+    },
+    {
+      "epoch": 0.9177043854899838,
+      "grad_norm": 1.7421875,
+      "learning_rate": 4.22927100723428e-06,
+      "loss": 0.0976,
+      "step": 3390
+    },
+    {
+      "epoch": 0.9204114780725501,
+      "grad_norm": 0.83984375,
+      "learning_rate": 4.090150250417363e-06,
+      "loss": 0.1194,
+      "step": 3400
+    },
+    {
+      "epoch": 0.9204114780725501,
+      "eval_loss": 0.15643475949764252,
+      "eval_runtime": 63.0287,
+      "eval_samples_per_second": 7.933,
+      "eval_steps_per_second": 0.254,
+      "step": 3400
+    },
+    {
+      "epoch": 0.9231185706551164,
+      "grad_norm": 3.0625,
+      "learning_rate": 3.951029493600445e-06,
+      "loss": 0.1401,
+      "step": 3410
+    },
+    {
+      "epoch": 0.9258256632376828,
+      "grad_norm": 1.2421875,
+      "learning_rate": 3.811908736783528e-06,
+      "loss": 0.1137,
+      "step": 3420
+    },
+    {
+      "epoch": 0.9285327558202491,
+      "grad_norm": 3.109375,
+      "learning_rate": 3.672787979966611e-06,
+      "loss": 0.1236,
+      "step": 3430
+    },
+    {
+      "epoch": 0.9312398484028154,
+      "grad_norm": 3.109375,
+      "learning_rate": 3.533667223149694e-06,
+      "loss": 0.1108,
+      "step": 3440
+    },
+    {
+      "epoch": 0.9339469409853817,
+      "grad_norm": 2.796875,
+      "learning_rate": 3.3945464663327773e-06,
+      "loss": 0.1309,
+      "step": 3450
+    },
+    {
+      "epoch": 0.9339469409853817,
+      "eval_loss": 0.1561814844608307,
+      "eval_runtime": 63.1871,
+      "eval_samples_per_second": 7.913,
+      "eval_steps_per_second": 0.253,
+      "step": 3450
+    },
+    {
+      "epoch": 0.9366540335679481,
+      "grad_norm": 2.265625,
+      "learning_rate": 3.25542570951586e-06,
+      "loss": 0.197,
+      "step": 3460
+    },
+    {
+      "epoch": 0.9393611261505144,
+      "grad_norm": 2.796875,
+      "learning_rate": 3.1163049526989427e-06,
+      "loss": 0.1253,
+      "step": 3470
+    },
+    {
+      "epoch": 0.9420682187330807,
+      "grad_norm": 2.125,
+      "learning_rate": 2.9771841958820256e-06,
+      "loss": 0.1318,
+      "step": 3480
+    },
+    {
+      "epoch": 0.944775311315647,
+      "grad_norm": 5.21875,
+      "learning_rate": 2.838063439065109e-06,
+      "loss": 0.1233,
+      "step": 3490
+    },
+    {
+      "epoch": 0.9474824038982134,
+      "grad_norm": 0.703125,
+      "learning_rate": 2.6989426822481914e-06,
+      "loss": 0.1058,
+      "step": 3500
+    },
+    {
+      "epoch": 0.9474824038982134,
+      "eval_loss": 0.1561388522386551,
+      "eval_runtime": 73.2191,
+      "eval_samples_per_second": 6.829,
+      "eval_steps_per_second": 0.219,
+      "step": 3500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 3694,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.05068095973888e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

colpali-v1.1/checkpoint-3500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a581b995cbd1ceefb231054f009a9d0777044fc25ab0e59050e256a55069eded
+size 5048

colpali-v1.1/git_hash.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ e8348666a5a7fff4dd7d53d81411a46f7e011eba

colpali-v1.1/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "return_tensors",
+    "data_format",
+    "input_data_format",
+    "do_convert_rgb"
+  ],
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_seq_length": 1024,
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "PaliGemmaProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 448,
+    "width": 448
+  }
+}

colpali-v1.1/results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"validation_set": {"ndcg_at_1": 0.782, "ndcg_at_3": 0.83869, "ndcg_at_5": 0.84945, "ndcg_at_10": 0.86122, "ndcg_at_20": 0.86639, "ndcg_at_100": 0.87296, "ndcg_at_1000": 0.87382, "map_at_1": 0.782, "map_at_3": 0.82567, "map_at_5": 0.83167, "map_at_10": 0.83659, "map_at_20": 0.83807, "map_at_100": 0.83894, "map_at_1000": 0.83899, "recall_at_1": 0.782, "recall_at_3": 0.876, "recall_at_5": 0.902, "recall_at_10": 0.938, "recall_at_20": 0.958, "recall_at_100": 0.994, "recall_at_1000": 1.0, "precision_at_1": 0.782, "precision_at_3": 0.292, "precision_at_5": 0.1804, "precision_at_10": 0.0938, "precision_at_20": 0.0479, "precision_at_100": 0.00994, "precision_at_1000": 0.001, "mrr_at_1": 0.786, "mrr_at_3": 0.8293333333333331, "mrr_at_5": 0.8353333333333332, "mrr_at_10": 0.8401857142857142, "mrr_at_20": 0.8413927173703489, "mrr_at_100": 0.842343017479603, "mrr_at_1000": 0.84237119907226, "naucs_at_1_max": 0.1287204083634169, "naucs_at_1_std": 0.04394670353021356, "naucs_at_1_diff1": 0.9147079015332307, "naucs_at_3_max": 0.15408207410154462, "naucs_at_3_std": 0.24171070115262258, "naucs_at_3_diff1": 0.8671703593832085, "naucs_at_5_max": 0.14837363516835303, "naucs_at_5_std": 0.31976600163875035, "naucs_at_5_diff1": 0.8631452581032408, "naucs_at_10_max": 0.1179633143579976, "naucs_at_10_std": 0.597316345893196, "naucs_at_10_diff1": 0.8860124695039325, "naucs_at_20_max": 0.12424969987995331, "naucs_at_20_std": 0.7891823396025082, "naucs_at_20_diff1": 0.9336401227157501, "naucs_at_100_max": -0.17133520074699127, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_energy": {"ndcg_at_1": 0.87, "ndcg_at_3": 0.91917, "ndcg_at_5": 0.92347, "ndcg_at_10": 0.92937, "ndcg_at_20": 0.92937, "ndcg_at_100": 0.93128, "ndcg_at_1000": 0.93277, "map_at_1": 0.87, "map_at_3": 0.90833, "map_at_5": 0.91083, "map_at_10": 0.91294, "map_at_20": 0.91294, "map_at_100": 0.91321, "map_at_1000": 0.91331, "recall_at_1": 0.87, "recall_at_3": 0.95, "recall_at_5": 0.96, "recall_at_10": 0.98, "recall_at_20": 0.98, "recall_at_100": 0.99, "recall_at_1000": 1.0, "precision_at_1": 0.87, "precision_at_3": 0.31667, "precision_at_5": 0.192, "precision_at_10": 0.098, "precision_at_20": 0.049, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.88, "mrr_at_3": 0.9133333333333333, "mrr_at_5": 0.9158333333333333, "mrr_at_10": 0.9183730158730158, "mrr_at_20": 0.9183730158730158, "mrr_at_100": 0.9187453269596126, "mrr_at_1000": 0.9187453269596126, "naucs_at_1_max": 0.24364955935718074, "naucs_at_1_std": -0.12800859068355205, "naucs_at_1_diff1": 0.9218692142486855, "naucs_at_3_max": 0.21577964519141157, "naucs_at_3_std": -0.2550887021475216, "naucs_at_3_diff1": 0.9477124183006521, "naucs_at_5_max": 0.05240429505135564, "naucs_at_5_std": -0.5361811391223089, "naucs_at_5_diff1": 0.9346405228758139, "naucs_at_10_max": 0.7957516339869297, "naucs_at_10_std": 0.07936507936508234, "naucs_at_10_diff1": 0.9346405228758136, "naucs_at_20_max": 0.7957516339869297, "naucs_at_20_std": 0.07936507936508234, "naucs_at_20_diff1": 0.9346405228758136, "naucs_at_100_max": 0.8692810457516374, "naucs_at_100_std": 0.7222222222222041, "naucs_at_100_diff1": 0.8692810457516374, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_healthcare_industry": {"ndcg_at_1": 0.92, "ndcg_at_3": 0.94893, "ndcg_at_5": 0.94893, "ndcg_at_10": 0.94893, "ndcg_at_20": 0.95163, "ndcg_at_100": 0.95554, "ndcg_at_1000": 0.95554, "map_at_1": 0.92, "map_at_3": 0.94167, "map_at_5": 0.94167, "map_at_10": 0.94167, "map_at_20": 0.9425, "map_at_100": 0.94313, "map_at_1000": 0.94313, "recall_at_1": 0.92, "recall_at_3": 0.97, "recall_at_5": 0.97, "recall_at_10": 0.97, "recall_at_20": 0.98, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.32333, "precision_at_5": 0.194, "precision_at_10": 0.097, "precision_at_20": 0.049, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.92, "mrr_at_3": 0.9416666666666665, "mrr_at_5": 0.9416666666666665, "mrr_at_10": 0.9426666666666665, "mrr_at_20": 0.9426666666666665, "mrr_at_100": 0.9433152709359603, "mrr_at_1000": 0.9433152709359603, "naucs_at_1_max": 0.129493464052287, "naucs_at_1_std": -0.1989379084967342, "naucs_at_1_diff1": 0.8468137254901948, "naucs_at_3_max": -0.00964830376594965, "naucs_at_3_std": -0.7268907563025196, "naucs_at_3_diff1": 0.8638344226579548, "naucs_at_5_max": -0.009648303765951795, "naucs_at_5_std": -0.7268907563025226, "naucs_at_5_diff1": 0.8638344226579531, "naucs_at_10_max": -0.009648303765951795, "naucs_at_10_std": -0.7268907563025226, "naucs_at_10_diff1": 0.8638344226579531, "naucs_at_20_max": -0.5144724556489195, "naucs_at_20_std": -1.1517273576097127, "naucs_at_20_diff1": 0.9346405228758136, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.95, "ndcg_at_3": 0.97393, "ndcg_at_5": 0.97393, "ndcg_at_10": 0.97393, "ndcg_at_20": 0.97393, "ndcg_at_100": 0.97581, "ndcg_at_1000": 0.97581, "map_at_1": 0.95, "map_at_3": 0.96833, "map_at_5": 0.96833, "map_at_10": 0.96833, "map_at_20": 0.96833, "map_at_100": 0.96859, "map_at_1000": 0.96859, "recall_at_1": 0.95, "recall_at_3": 0.99, "recall_at_5": 0.99, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.33, "precision_at_5": 0.198, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.95, "mrr_at_3": 0.9683333333333334, "mrr_at_5": 0.9683333333333334, "mrr_at_10": 0.9683333333333334, "mrr_at_20": 0.9683333333333334, "mrr_at_100": 0.9686111111111111, "mrr_at_1000": 0.9686111111111111, "naucs_at_1_max": -0.2407096171802055, "naucs_at_1_std": -0.8749766573295958, "naucs_at_1_diff1": 0.8846872082166202, "naucs_at_3_max": -1.7399626517274398, "naucs_at_3_std": -1.7399626517274398, "naucs_at_3_diff1": 0.5541549953314585, "naucs_at_5_max": -1.7399626517273863, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 0.5541549953314738, "naucs_at_10_max": -1.7399626517273863, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 0.5541549953314738, "naucs_at_20_max": -1.7399626517273863, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 0.5541549953314738, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports": {"ndcg_at_1": 0.86, "ndcg_at_3": 0.91286, "ndcg_at_5": 0.92147, "ndcg_at_10": 0.92448, "ndcg_at_20": 0.92727, "ndcg_at_100": 0.92929, "ndcg_at_1000": 0.92929, "map_at_1": 0.86, "map_at_3": 0.9, "map_at_5": 0.905, "map_at_10": 0.90611, "map_at_20": 0.90702, "map_at_100": 0.90735, "map_at_1000": 0.90735, "recall_at_1": 0.86, "recall_at_3": 0.95, "recall_at_5": 0.97, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.86, "precision_at_3": 0.31667, "precision_at_5": 0.194, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.86, "mrr_at_3": 0.9, "mrr_at_5": 0.905, "mrr_at_10": 0.90625, "mrr_at_20": 0.907159090909091, "mrr_at_100": 0.9075039184952978, "mrr_at_1000": 0.9075039184952978, "naucs_at_1_max": 0.43501454898157155, "naucs_at_1_std": 0.2136275460717749, "naucs_at_1_diff1": 0.9035263960094214, "naucs_at_3_max": 0.6491129785247453, "naucs_at_3_std": 0.4804855275443501, "naucs_at_3_diff1": 0.9477124183006521, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 0.9128540305010848, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 0.9346405228758136, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 0.8692810457516413, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "infovqa_subsampled": {"ndcg_at_1": 0.732, "ndcg_at_3": 0.79395, "ndcg_at_5": 0.81366, "ndcg_at_10": 0.82475, "ndcg_at_20": 0.82906, "ndcg_at_100": 0.83691, "ndcg_at_1000": 0.83975, "map_at_1": 0.732, "map_at_3": 0.77933, "map_at_5": 0.79023, "map_at_10": 0.79487, "map_at_20": 0.79593, "map_at_100": 0.79706, "map_at_1000": 0.79717, "recall_at_1": 0.732, "recall_at_3": 0.836, "recall_at_5": 0.884, "recall_at_10": 0.918, "recall_at_20": 0.936, "recall_at_100": 0.978, "recall_at_1000": 1.0, "precision_at_1": 0.732, "precision_at_3": 0.27867, "precision_at_5": 0.1768, "precision_at_10": 0.0918, "precision_at_20": 0.0468, "precision_at_100": 0.00978, "precision_at_1000": 0.001, "mrr_at_1": 0.732, "mrr_at_3": 0.7786666666666665, "mrr_at_5": 0.7900666666666664, "mrr_at_10": 0.794449206349206, "mrr_at_20": 0.7953232247284874, "mrr_at_100": 0.7966255910510839, "mrr_at_1000": 0.7967190443508116, "naucs_at_1_max": 0.4347700535605503, "naucs_at_1_std": 0.0052057373871626685, "naucs_at_1_diff1": 0.865643715535724, "naucs_at_3_max": 0.6048031004775511, "naucs_at_3_std": 0.15614576084087275, "naucs_at_3_diff1": 0.8398166590674573, "naucs_at_5_max": 0.5930604282107613, "naucs_at_5_std": 0.1861700383619125, "naucs_at_5_diff1": 0.7956080527230405, "naucs_at_10_max": 0.6479242103345397, "naucs_at_10_std": 0.25604062763316404, "naucs_at_10_diff1": 0.7970326341918881, "naucs_at_20_max": 0.6982668067226916, "naucs_at_20_std": 0.40970763305321833, "naucs_at_20_diff1": 0.7613941409897302, "naucs_at_100_max": 0.9762329174093898, "naucs_at_100_std": 0.8936847466259251, "naucs_at_100_diff1": 0.80561921738392, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_subsampled": {"ndcg_at_1": 0.458, "ndcg_at_3": 0.5249, "ndcg_at_5": 0.54987, "ndcg_at_10": 0.5691, "ndcg_at_20": 0.58534, "ndcg_at_100": 0.60898, "ndcg_at_1000": 0.62315, "map_at_1": 0.458, "map_at_3": 0.508, "map_at_5": 0.522, "map_at_10": 0.52983, "map_at_20": 0.53432, "map_at_100": 0.53744, "map_at_1000": 0.53795, "recall_at_1": 0.458, "recall_at_3": 0.574, "recall_at_5": 0.634, "recall_at_10": 0.694, "recall_at_20": 0.758, "recall_at_100": 0.888, "recall_at_1000": 1.0, "precision_at_1": 0.458, "precision_at_3": 0.19133, "precision_at_5": 0.1268, "precision_at_10": 0.0694, "precision_at_20": 0.0379, "precision_at_100": 0.00888, "precision_at_1000": 0.001, "mrr_at_1": 0.462, "mrr_at_3": 0.5103333333333334, "mrr_at_5": 0.5240333333333334, "mrr_at_10": 0.5330150793650792, "mrr_at_20": 0.5363530102594342, "mrr_at_100": 0.5398099695126392, "mrr_at_1000": 0.540360971784682, "naucs_at_1_max": 0.14478705160292352, "naucs_at_1_std": 0.3721140942241178, "naucs_at_1_diff1": 0.8556357724943732, "naucs_at_3_max": 0.0955052454658937, "naucs_at_3_std": 0.4342882512519065, "naucs_at_3_diff1": 0.7991501163256658, "naucs_at_5_max": 0.01343769437454414, "naucs_at_5_std": 0.48670912254002735, "naucs_at_5_diff1": 0.7732231206645599, "naucs_at_10_max": -0.024673527538943748, "naucs_at_10_std": 0.5356353895792051, "naucs_at_10_diff1": 0.7689667071014644, "naucs_at_20_max": -0.12562798953100113, "naucs_at_20_std": 0.5620597626854955, "naucs_at_20_diff1": 0.7722820785003025, "naucs_at_100_max": -0.23859408820346314, "naucs_at_100_std": 0.864870806277056, "naucs_at_100_diff1": 0.7339945211038957, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "arxivqa_subsampled": {"ndcg_at_1": 0.712, "ndcg_at_3": 0.76712, "ndcg_at_5": 0.78364, "ndcg_at_10": 0.80123, "ndcg_at_20": 0.812, "ndcg_at_100": 0.81995, "ndcg_at_1000": 0.82209, "map_at_1": 0.712, "map_at_3": 0.75367, "map_at_5": 0.76287, "map_at_10": 0.77019, "map_at_20": 0.77323, "map_at_100": 0.7744, "map_at_1000": 0.77449, "recall_at_1": 0.712, "recall_at_3": 0.806, "recall_at_5": 0.846, "recall_at_10": 0.9, "recall_at_20": 0.942, "recall_at_100": 0.984, "recall_at_1000": 1.0, "precision_at_1": 0.712, "precision_at_3": 0.26867, "precision_at_5": 0.1692, "precision_at_10": 0.09, "precision_at_20": 0.0471, "precision_at_100": 0.00984, "precision_at_1000": 0.001, "mrr_at_1": 0.724, "mrr_at_3": 0.7626666666666667, "mrr_at_5": 0.7716666666666666, "mrr_at_10": 0.7785738095238096, "mrr_at_20": 0.7814459001782531, "mrr_at_100": 0.7824821791237915, "mrr_at_1000": 0.7825927236311448, "naucs_at_1_max": 0.5179261037469994, "naucs_at_1_std": 0.0715515827456125, "naucs_at_1_diff1": 0.8994028359700004, "naucs_at_3_max": 0.5866330274286543, "naucs_at_3_std": 0.15098605444623744, "naucs_at_3_diff1": 0.8370723586175715, "naucs_at_5_max": 0.6037655521242381, "naucs_at_5_std": 0.16178021782505292, "naucs_at_5_diff1": 0.8241259674406191, "naucs_at_10_max": 0.5842483660130722, "naucs_at_10_std": 0.20487394957983113, "naucs_at_10_diff1": 0.7742016806722692, "naucs_at_20_max": 0.735455101580865, "naucs_at_20_std": 0.3710035738433312, "naucs_at_20_diff1": 0.7694871051869058, "naucs_at_100_max": 0.8503151260504297, "naucs_at_100_std": 0.5717787114846009, "naucs_at_100_diff1": 0.8129668534080331, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "tabfquad_subsampled": {"ndcg_at_1": 0.76429, "ndcg_at_3": 0.83078, "ndcg_at_5": 0.84353, "ndcg_at_10": 0.85388, "ndcg_at_20": 0.86455, "ndcg_at_100": 0.86741, "ndcg_at_1000": 0.86741, "map_at_1": 0.76429, "map_at_3": 0.81429, "map_at_5": 0.82107, "map_at_10": 0.82532, "map_at_20": 0.82817, "map_at_100": 0.82864, "map_at_1000": 0.82864, "recall_at_1": 0.76429, "recall_at_3": 0.87857, "recall_at_5": 0.91071, "recall_at_10": 0.94286, "recall_at_20": 0.98571, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.76429, "precision_at_3": 0.29286, "precision_at_5": 0.18214, "precision_at_10": 0.09429, "precision_at_20": 0.04929, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.7571428571428571, "mrr_at_3": 0.8095238095238096, "mrr_at_5": 0.8179166666666666, "mrr_at_10": 0.8214625850340135, "mrr_at_20": 0.8241040305215449, "mrr_at_100": 0.8247231380884232, "mrr_at_1000": 0.8247231380884232, "naucs_at_1_max": 0.4032387013842082, "naucs_at_1_std": 0.20721575143829152, "naucs_at_1_diff1": 0.8371243592356286, "naucs_at_3_max": 0.37018609096531074, "naucs_at_3_std": 0.19961207623545132, "naucs_at_3_diff1": 0.7850677461067068, "naucs_at_5_max": 0.49086834733893503, "naucs_at_5_std": 0.26750700280112155, "naucs_at_5_diff1": 0.7712791783380023, "naucs_at_10_max": 0.5342845471521948, "naucs_at_10_std": 0.382148692810459, "naucs_at_10_diff1": 0.7389122315592906, "naucs_at_20_max": 0.42938842203549277, "naucs_at_20_std": 0.5764472455649069, "naucs_at_20_diff1": 0.8978758169934754, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tatdqa": {"ndcg_at_1": 0.51714, "ndcg_at_3": 0.63347, "ndcg_at_5": 0.65997, "ndcg_at_10": 0.68432, "ndcg_at_20": 0.69776, "ndcg_at_100": 0.71265, "ndcg_at_1000": 0.7148, "map_at_1": 0.51714, "map_at_3": 0.60543, "map_at_5": 0.62013, "map_at_10": 0.63032, "map_at_20": 0.63413, "map_at_100": 0.63629, "map_at_1000": 0.63639, "recall_at_1": 0.51714, "recall_at_3": 0.71437, "recall_at_5": 0.77871, "recall_at_10": 0.85328, "recall_at_20": 0.90559, "recall_at_100": 0.98437, "recall_at_1000": 1.0, "precision_at_1": 0.51714, "precision_at_3": 0.23812, "precision_at_5": 0.15574, "precision_at_10": 0.08533, "precision_at_20": 0.04528, "precision_at_100": 0.00984, "precision_at_1000": 0.001, "mrr_at_1": 0.5189416716776909, "mrr_at_3": 0.6063339346562444, "mrr_at_5": 0.6203447584686321, "mrr_at_10": 0.6304701295230465, "mrr_at_20": 0.6343502614596628, "mrr_at_100": 0.6366034259317307, "mrr_at_1000": 0.6366958159270893, "naucs_at_1_max": 0.16652309459558007, "naucs_at_1_std": -0.06670433006927416, "naucs_at_1_diff1": 0.6953792910740597, "naucs_at_3_max": 0.16902492509679246, "naucs_at_3_std": -0.03701362337519723, "naucs_at_3_diff1": 0.5494053126321319, "naucs_at_5_max": 0.21105315084874532, "naucs_at_5_std": 0.04611791703673267, "naucs_at_5_diff1": 0.5162174552139975, "naucs_at_10_max": 0.23091913809374726, "naucs_at_10_std": 0.10830125135311366, "naucs_at_10_diff1": 0.4866871626269709, "naucs_at_20_max": 0.2725948129883982, "naucs_at_20_std": 0.24954038307560938, "naucs_at_20_diff1": 0.4588931923467883, "naucs_at_100_max": 0.6502319824989845, "naucs_at_100_std": 0.6011619544721175, "naucs_at_100_diff1": 0.7480994634253559, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shift_project": {"ndcg_at_1": 0.56, "ndcg_at_3": 0.70095, "ndcg_at_5": 0.72934, "ndcg_at_10": 0.74318, "ndcg_at_20": 0.7537, "ndcg_at_100": 0.75912, "ndcg_at_1000": 0.76194, "map_at_1": 0.56, "map_at_3": 0.66667, "map_at_5": 0.68217, "map_at_10": 0.68842, "map_at_20": 0.69152, "map_at_100": 0.69222, "map_at_1000": 0.69237, "recall_at_1": 0.56, "recall_at_3": 0.8, "recall_at_5": 0.87, "recall_at_10": 0.91, "recall_at_20": 0.95, "recall_at_100": 0.98, "recall_at_1000": 1.0, "precision_at_1": 0.56, "precision_at_3": 0.26667, "precision_at_5": 0.174, "precision_at_10": 0.091, "precision_at_20": 0.0475, "precision_at_100": 0.0098, "precision_at_1000": 0.001, "mrr_at_1": 0.59, "mrr_at_3": 0.69, "mrr_at_5": 0.7055, "mrr_at_10": 0.7100833333333334, "mrr_at_20": 0.7131832236391059, "mrr_at_100": 0.7139141492175927, "mrr_at_1000": 0.7140687525760177, "naucs_at_1_max": -0.06823472632296203, "naucs_at_1_std": -0.21851578469225566, "naucs_at_1_diff1": 0.6402168975698381, "naucs_at_3_max": -0.5083416583416578, "naucs_at_3_std": -0.719330669330669, "naucs_at_3_diff1": 0.49285714285714205, "naucs_at_5_max": -0.40505813522920514, "naucs_at_5_std": -0.5592460934607096, "naucs_at_5_diff1": 0.3987632377990074, "naucs_at_10_max": -0.4162776221599734, "naucs_at_10_std": -0.47852474323062383, "naucs_at_10_diff1": 0.4046062869592272, "naucs_at_20_max": -0.5808590102707625, "naucs_at_20_std": -0.5164332399626398, "naucs_at_20_diff1": 0.4697478991596656, "naucs_at_100_max": -1.4458450046685247, "naucs_at_100_std": -1.4458450046685247, "naucs_at_100_diff1": 0.6790382819794637, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}

colpali-v1.1/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

colpali-v1.1/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffd310e50986db7a039948ab83441d612689e7f989198e31b5c8984ca458adf6
+size 17763459

colpali-v1.1/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

colpali-v1.1/training_config.yml ADDED Viewed

	@@ -0,0 +1,40 @@

+config:
+  (): colpali_engine.utils.train_colpali_engine_models.ColModelTrainingConfig
+  output_dir: !path ../../../models/right_pad/train_colpali-3b-mix-448
+  processor:
+    () : colpali_engine.utils.wrapper.AutoProcessorWrapper
+    pretrained_model_name_or_path:  "./models/colpaligemma-3b-mix-448-base" # "./models/paligemma-3b-mix-448"
+    max_length: 50
+  model:
+    (): colpali_engine.utils.wrapper.AllPurposeWrapper
+    class_to_instanciate: !ext colpali_engine.models.paligemma_colbert_architecture.ColPali
+    pretrained_model_name_or_path: "./models/colpaligemma-3b-mix-448-base"
+    torch_dtype:  !ext torch.bfloat16
+#    device_map: "auto"
+#    quantization_config:
+#      (): transformers.BitsAndBytesConfig
+#      load_in_4bit: true
+#      bnb_4bit_quant_type: "nf4"
+#      bnb_4bit_compute_dtype:  "bfloat16"
+#      bnb_4bit_use_double_quant: true
+  dataset_loading_func: !ext colpali_engine.utils.dataset_transformation.load_train_set
+  eval_dataset_loader: !import ../data/test_data.yaml
+  max_length: 50
+  run_eval: true
+  add_suffix: true
+  loss_func:
+    (): colpali_engine.loss.colbert_loss.ColbertPairwiseCELoss
+  tr_args: !import ../tr_args/default_tr_args.yaml
+  peft_config:
+    (): peft.LoraConfig
+    r: 32
+    lora_alpha: 32
+    lora_dropout: 0.1
+    init_lora_weights: "gaussian"
+    bias: "none"
+    task_type: "FEATURE_EXTRACTION"
+    target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'
+    # target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'

colqwen2-v1.0/.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

colqwen2-v1.0/README.md ADDED Viewed

	@@ -0,0 +1,126 @@

+---
+license: apache-2.0
+library_name: colpali
+base_model: vidore/colqwen2-base
+language:
+- en
+tags:
+- colpali
+- vidore-experimental
+- vidore
+pipeline_tag: visual-document-retrieval
+---
+# ColQwen2: Visual Retriever based on Qwen2-VL-2B-Instruct with ColBERT strategy
+### This is the base version trained with batch_size 256 instead of 32 for 5 epoch and with the updated pad token
+ColQwen2 is a model based on a novel model architecture and training strategy based on Vision Language Models (VLMs) to efficiently index documents from their visual features.
+It is a [Qwen2-VL-2B](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) extension that generates [ColBERT](https://arxiv.org/abs/2004.12832)- style multi-vector representations of text and images.
+It was introduced in the paper [ColPali: Efficient Document Retrieval with Vision Language Models](https://arxiv.org/abs/2407.01449) and first released in [this repository](https://github.com/ManuelFay/colpali)
+<p align="center"><img width=800 src="https://github.com/illuin-tech/colpali/blob/main/assets/colpali_architecture.webp?raw=true"/></p>
+## Version specificity
+This model takes dynamic image resolutions in input and does not resize them, changing their aspect ratio as in ColPali.
+Maximal resolution is set so that 768 image patches are created at most. Experiments show clear improvements with larger amounts of image patches, at the cost of memory requirements.
+This version is trained with `colpali-engine==0.3.1`.
+Data is the same as the ColPali data described in the paper.
+## Model Training
+### Dataset
+Our training dataset of 127,460 query-page pairs is comprised of train sets of openly available academic datasets (63%) and a synthetic dataset made up of pages from web-crawled PDF documents and augmented with VLM-generated (Claude-3 Sonnet) pseudo-questions (37%).
+Our training set is fully English by design, enabling us to study zero-shot generalization to non-English languages. We explicitly verify no multi-page PDF document is used both [*ViDoRe*](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d) and in the train set to prevent evaluation contamination.
+A validation set is created with 2% of the samples to tune hyperparameters.
+*Note: Multilingual data is present in the pretraining corpus of the language model and most probably in the multimodal training.*
+### Parameters
+All models are trained for 1 epoch on the train set. Unless specified otherwise, we train models in `bfloat16` format, use low-rank adapters ([LoRA](https://arxiv.org/abs/2106.09685))
+with `alpha=32`  and `r=32` on the transformer layers from the language model,
+as well as the final randomly initialized projection layer, and use a `paged_adamw_8bit` optimizer.
+We train on an 8 GPU setup with data parallelism, a learning rate of 5e-5 with linear decay with 2.5% warmup steps, and a batch size of 32.
+## Usage
+Make sure `colpali-engine` is installed from source or with a version superior to 0.3.4.
+`transformers` version must be > 4.46.1.
+```bash
+pip install git+https://github.com/illuin-tech/colpali
+```
+```python
+import torch
+from PIL import Image
+from transformers.utils.import_utils import is_flash_attn_2_available
+from colpali_engine.models import ColQwen2, ColQwen2Processor
+model = ColQwen2.from_pretrained(
+    "vidore/colqwen2-v1.0",
+    torch_dtype=torch.bfloat16,
+    device_map="cuda:0",  # or "mps" if on Apple Silicon
+    attn_implementation="flash_attention_2" if is_flash_attn_2_available() else None,
+).eval()
+processor = ColQwen2Processor.from_pretrained("vidore/colqwen2-v1.0")
+# Your inputs
+images = [
+    Image.new("RGB", (128, 128), color="white"),
+    Image.new("RGB", (64, 32), color="black"),
+]
+queries = [
+    "Is attention really all you need?",
+    "What is the amount of bananas farmed in Salvador?",
+]
+# Process the inputs
+batch_images = processor.process_images(images).to(model.device)
+batch_queries = processor.process_queries(queries).to(model.device)
+# Forward pass
+with torch.no_grad():
+    image_embeddings = model(**batch_images)
+    query_embeddings = model(**batch_queries)
+scores = processor.score_multi_vector(query_embeddings, image_embeddings)
+```
+## Limitations
+ - **Focus**: The model primarily focuses on PDF-type documents and high-ressources languages, potentially limiting its generalization to other document types or less represented languages.
+ - **Support**: The model relies on multi-vector retreiving derived from the ColBERT late interaction mechanism, which may require engineering efforts to adapt to widely used vector retrieval frameworks that lack native multi-vector support.
+## License
+ColQwen2's vision language backbone model (Qwen2-VL) is under `apache2.0` license. The adapters attached to the model are under MIT license.
+## Contact
+- Manuel Faysse: [email protected]
+- Hugues Sibille: [email protected]
+- Tony Wu: [email protected]
+## Citation
+If you use any datasets or models from this organization in your research, please cite the original dataset as follows:
+```bibtex
+@misc{faysse2024colpaliefficientdocumentretrieval,
+  title={ColPali: Efficient Document Retrieval with Vision Language Models},
+  author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo},
+  year={2024},
+  eprint={2407.01449},
+  archivePrefix={arXiv},
+  primaryClass={cs.IR},
+  url={https://arxiv.org/abs/2407.01449},
+}
+```

colqwen2-v1.0/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "checkpoint/colqwen2-base",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)",
+  "task_type": "FEATURE_EXTRACTION",
+  "use_dora": false,
+  "use_rslora": false
+}

colqwen2-v1.0/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc856312174dc99a4c7f88a2c54d9590a3b3f5b5a86e2728d7138c7f4758c6d5
+size 74018232

colqwen2-v1.0/added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

colqwen2-v1.0/chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+}

colqwen2-v1.0/git_hash.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 961d20526206465f9ebef9affd131d53a9f516e6

colqwen2-v1.0/handler.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import base64
+import io
+from PIL import Image
+from typing import Dict, List, Any
+from transformers.utils.import_utils import is_flash_attn_2_available
+from colpali_engine.models import ColQwen2, ColQwen2Processor
+import torch
+class EndpointHandler():
+    def __init__(self, path=""):
+        self.model = ColQwen2.from_pretrained(
+            path,
+            torch_dtype=torch.bfloat16,
+            device_map="cuda:0",  # or "mps" if on Apple Silicon
+            attn_implementation="flash_attention_2" if is_flash_attn_2_available() else None,
+        ).eval()
+        self.processor = ColQwen2Processor.from_pretrained(path) #, max_num_visual_tokens=8192) # temporary
+        # self.model = torch.compile(self.model)
+        print(f"Model and processor loaded {'with' if is_flash_attn_2_available() else 'without'} FA2")
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Expects data in one of the following formats in the "inputs" key:
+            {
+                "images": [
+                    "base64_encoded_image1",
+                    "base64_encoded_image2",
+                    ...
+                ]
+            }
+            xor
+            {
+                "queries": [
+                    "text1",
+                    "text2",
+                    ...
+                ]
+            }
+        Returns embeddings for the provided input type.
+        """
+        # Input validation
+        data = data.get("inputs", [])
+        input_keys = [key for key in ["images", "queries"] if key in data]
+        if len(input_keys) != 1:
+            return {"error": "Exactly one of 'images', 'queries' must be provided"}
+        input_type = input_keys[0]
+        inputs = data[input_type]
+        if input_type == "images":
+            if not isinstance(inputs, list):
+                inputs = [inputs]
+            if len(inputs) > 8:
+                return {"message": "Send a maximum of 8 images at once. We recommend sending one by one to improve load balancing."}
+            # Decode each image from base64 and convert to a PIL Image
+            decoded_images = []
+            for img_str in inputs:
+                try:
+                    img_data = base64.b64decode(img_str)
+                    image = Image.open(io.BytesIO(img_data)).convert("RGB")
+                    decoded_images.append(image)
+                except Exception as e:
+                    return {"error": f"Error decoding image: {str(e)}"}
+            # Process the images using the processor
+            batch = self.processor.process_images(decoded_images).to(self.model.device)
+        # elif input_type == "processed_images":
+        #     try:
+        #         buffer = io.BytesIO(base64.b64decode(inputs))
+        #         batch = torch.load(buffer, map_location=self.model.device)
+        #     except Exception as e:
+        #         return {"error": f"Error processing preprocessed images: {str(e)}"}
+        else:  # text
+            if not isinstance(inputs, list):
+                inputs = [inputs]
+            try:
+                batch = self.processor.process_queries(inputs).to(self.model.device)
+            except Exception as e:
+                return {"error": f"Error processing text: {str(e)}"}
+        # Forward pass through the model
+        with torch.inference_mode():
+            embeddings = self.model(**batch).tolist()
+        return {"embeddings": embeddings}

colqwen2-v1.0/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

colqwen2-v1.0/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 602112,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "max_pixels": 602112,
+    "min_pixels": 3136,
+    "longest_edge": 602112,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2
+}

colqwen2-v1.0/requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ colpali-engine==0.3.9
2	+ flash-attn

colqwen2-v1.0/results.json ADDED Viewed

	@@ -0,0 +1,465 @@

+{"vidore/restaurant_esg_reports_beir": {"ndcg_at_1": 0.62179, "ndcg_at_3": 0.57934, "ndcg_at_5": 0.62234, "ndcg_at_10": 0.65262, "ndcg_at_20": 0.67475, "ndcg_at_50": 0.70087, "ndcg_at_100": 0.71211, "map_at_1": 0.43526, "map_at_3": 0.50267, "map_at_5": 0.55008, "map_at_10": 0.57335, "map_at_20": 0.58435, "map_at_50": 0.59308, "map_at_100": 0.5954, "recall_at_1": 0.43526, "recall_at_3": 0.53333, "recall_at_5": 0.66204, "recall_at_10": 0.75262, "recall_at_20": 0.81833, "recall_at_50": 0.91518, "recall_at_100": 0.96614, "precision_at_1": 0.63462, "precision_at_3": 0.30769, "precision_at_5": 0.24615, "precision_at_10": 0.14808, "precision_at_20": 0.08654, "precision_at_50": 0.04115, "precision_at_100": 0.02212, "mrr_at_1": 0.5961538461538461, "mrr_at_3": 0.6442307692307693, "mrr_at_5": 0.6721153846153847, "mrr_at_10": 0.678792735042735, "mrr_at_20": 0.6830509768009766, "mrr_at_50": 0.6843387515262513, "mrr_at_100": 0.6846592643467642, "naucs_at_1_max": 0.007278669575223614, "naucs_at_1_std": -0.16862511436532815, "naucs_at_1_diff1": 0.37195886708166814, "naucs_at_3_max": -0.11224357092950424, "naucs_at_3_std": -0.13484611004600808, "naucs_at_3_diff1": 0.12647962031083498, "naucs_at_5_max": -0.08235536599427677, "naucs_at_5_std": 0.029280898603991155, "naucs_at_5_diff1": 0.03850576951146517, "naucs_at_10_max": -0.05329392017057177, "naucs_at_10_std": 0.07795337723139084, "naucs_at_10_diff1": -0.13170232588923755, "naucs_at_20_max": -0.13017358757688524, "naucs_at_20_std": -0.008088492748358699, "naucs_at_20_diff1": -0.2193370407928556, "naucs_at_50_max": -0.09015721409307421, "naucs_at_50_std": 0.022247428759607314, "naucs_at_50_diff1": -0.2736453927338279, "naucs_at_100_max": -0.14696693886931617, "naucs_at_100_std": -0.010945474748335252, "naucs_at_100_diff1": -0.3271662812920136},
+        "vidore/synthetics_economics_macro_economy_2024_filtered_v1.0_multilingual": {
+            "ndcg_at_1": 0.55603,
+            "ndcg_at_3": 0.54807,
+            "ndcg_at_5": 0.53157,
+            "ndcg_at_10": 0.5146,
+            "ndcg_at_20": 0.54585,
+            "ndcg_at_50": 0.61451,
+            "ndcg_at_100": 0.6553,
+            "map_at_1": 0.0783,
+            "map_at_3": 0.16664,
+            "map_at_5": 0.21874,
+            "map_at_10": 0.28014,
+            "map_at_20": 0.3378,
+            "map_at_50": 0.39242,
+            "map_at_100": 0.42099,
+            "recall_at_1": 0.0783,
+            "recall_at_3": 0.20216,
+            "recall_at_5": 0.28814,
+            "recall_at_10": 0.41158,
+            "recall_at_20": 0.55969,
+            "recall_at_50": 0.76372,
+            "recall_at_100": 0.88571,
+            "precision_at_1": 0.55603,
+            "precision_at_3": 0.51293,
+            "precision_at_5": 0.47328,
+            "precision_at_10": 0.38103,
+            "precision_at_20": 0.29483,
+            "precision_at_50": 0.18716,
+            "precision_at_100": 0.1244,
+            "mrr_at_1": 0.5474137931034483,
+            "mrr_at_3": 0.6666666666666666,
+            "mrr_at_5": 0.6847701149425288,
+            "mrr_at_10": 0.6911022167487685,
+            "mrr_at_20": 0.6938394796748926,
+            "mrr_at_50": 0.6948904461025456,
+            "mrr_at_100": 0.6948904461025456,
+            "naucs_at_1_max": 0.07994091110523227,
+            "naucs_at_1_std": 0.12444987153144782,
+            "naucs_at_1_diff1": 0.09424460432630058,
+            "naucs_at_3_max": 0.16923654602805172,
+            "naucs_at_3_std": 0.2045449370458442,
+            "naucs_at_3_diff1": 0.09926647816044282,
+            "naucs_at_5_max": 0.17728557191005953,
+            "naucs_at_5_std": 0.2565334951047491,
+            "naucs_at_5_diff1": 0.07759845826151261,
+            "naucs_at_10_max": 0.13245740814133947,
+            "naucs_at_10_std": 0.21617752015273486,
+            "naucs_at_10_diff1": 0.023098575182872085,
+            "naucs_at_20_max": 0.0673142638543018,
+            "naucs_at_20_std": 0.19206721484700626,
+            "naucs_at_20_diff1": -0.007872307371572145,
+            "naucs_at_50_max": -0.02207912884522245,
+            "naucs_at_50_std": 0.14587302006330063,
+            "naucs_at_50_diff1": -0.048901378247186766,
+            "naucs_at_100_max": -0.07381834254515347,
+            "naucs_at_100_std": 0.10309591458261987,
+            "naucs_at_100_diff1": -0.0369817902657747
+        },
+        "vidore/synthetic_axa_filtered_v1.0_multilingual": {
+            "ndcg_at_1": 0.55556,
+            "ndcg_at_3": 0.57286,
+            "ndcg_at_5": 0.57184,
+            "ndcg_at_10": 0.57514,
+            "ndcg_at_20": 0.60015,
+            "ndcg_at_50": 0.64497,
+            "ndcg_at_100": 0.67229,
+            "map_at_1": 0.27024,
+            "map_at_3": 0.35733,
+            "map_at_5": 0.40728,
+            "map_at_10": 0.4556,
+            "map_at_20": 0.48059,
+            "map_at_50": 0.50401,
+            "map_at_100": 0.51134,
+            "recall_at_1": 0.27024,
+            "recall_at_3": 0.42572,
+            "recall_at_5": 0.50846,
+            "recall_at_10": 0.61625,
+            "recall_at_20": 0.70808,
+            "recall_at_50": 0.81979,
+            "recall_at_100": 0.92724,
+            "precision_at_1": 0.55556,
+            "precision_at_3": 0.40741,
+            "precision_at_5": 0.34444,
+            "precision_at_10": 0.24028,
+            "precision_at_20": 0.15,
+            "precision_at_50": 0.07972,
+            "precision_at_100": 0.04542,
+            "mrr_at_1": 0.5138888888888888,
+            "mrr_at_3": 0.6388888888888888,
+            "mrr_at_5": 0.6486111111111111,
+            "mrr_at_10": 0.6532407407407407,
+            "mrr_at_20": 0.6550035612535612,
+            "mrr_at_50": 0.6554515899273964,
+            "mrr_at_100": 0.6562765438177584,
+            "naucs_at_1_max": 0.06394991291872823,
+            "naucs_at_1_std": 0.2339109450954795,
+            "naucs_at_1_diff1": 0.30591875421589343,
+            "naucs_at_3_max": -0.17975083964563082,
+            "naucs_at_3_std": 0.027577428119795747,
+            "naucs_at_3_diff1": 0.09852366590105074,
+            "naucs_at_5_max": -0.23673003232880171,
+            "naucs_at_5_std": -0.01855706256438857,
+            "naucs_at_5_diff1": 0.03504088887398939,
+            "naucs_at_10_max": -0.32103926214321127,
+            "naucs_at_10_std": -0.06117912492226144,
+            "naucs_at_10_diff1": -0.14080922308311572,
+            "naucs_at_20_max": -0.34694147283910304,
+            "naucs_at_20_std": -0.04490360729046198,
+            "naucs_at_20_diff1": -0.17565284792297425,
+            "naucs_at_50_max": -0.3890920945387956,
+            "naucs_at_50_std": -0.06731905750861354,
+            "naucs_at_50_diff1": -0.19417578723003429,
+            "naucs_at_100_max": -0.41189447174112953,
+            "naucs_at_100_std": -0.0755751130681716,
+            "naucs_at_100_diff1": -0.21048470142462247
+        },
+        "vidore/synthetic_mit_biomedical_tissue_interactions_unfiltered": {
+            "ndcg_at_1": 0.59375,
+            "ndcg_at_3": 0.59937,
+            "ndcg_at_5": 0.6176,
+            "ndcg_at_10": 0.65125,
+            "ndcg_at_20": 0.68031,
+            "ndcg_at_50": 0.70414,
+            "ndcg_at_100": 0.7118,
+            "map_at_1": 0.35678,
+            "map_at_3": 0.48409,
+            "map_at_5": 0.52133,
+            "map_at_10": 0.55746,
+            "map_at_20": 0.57311,
+            "map_at_50": 0.582,
+            "map_at_100": 0.58404,
+            "recall_at_1": 0.35678,
+            "recall_at_3": 0.5553,
+            "recall_at_5": 0.6424,
+            "recall_at_10": 0.74954,
+            "recall_at_20": 0.83273,
+            "recall_at_50": 0.90356,
+            "recall_at_100": 0.92956,
+            "precision_at_1": 0.59375,
+            "precision_at_3": 0.36875,
+            "precision_at_5": 0.2775,
+            "precision_at_10": 0.18,
+            "precision_at_20": 0.10656,
+            "precision_at_50": 0.05138,
+            "precision_at_100": 0.02756,
+            "mrr_at_1": 0.5875,
+            "mrr_at_3": 0.6791666666666667,
+            "mrr_at_5": 0.6979166666666667,
+            "mrr_at_10": 0.7059871031746032,
+            "mrr_at_20": 0.7082808189563704,
+            "mrr_at_50": 0.7090456790962306,
+            "mrr_at_100": 0.7091433353462306,
+            "naucs_at_1_max": 0.249983882109141,
+            "naucs_at_1_std": -0.041280221045360686,
+            "naucs_at_1_diff1": 0.4059037531660143,
+            "naucs_at_3_max": 0.01878691845110894,
+            "naucs_at_3_std": -0.06513153750357396,
+            "naucs_at_3_diff1": -0.04117906448220925,
+            "naucs_at_5_max": -0.03604209381453121,
+            "naucs_at_5_std": -0.026999419446342467,
+            "naucs_at_5_diff1": -0.2004747312183539,
+            "naucs_at_10_max": -0.15546557639975261,
+            "naucs_at_10_std": -0.12249727214166432,
+            "naucs_at_10_diff1": -0.28313723428030524,
+            "naucs_at_20_max": -0.230360039337395,
+            "naucs_at_20_std": -0.11871250920639782,
+            "naucs_at_20_diff1": -0.35374918579255926,
+            "naucs_at_50_max": -0.21935608877014798,
+            "naucs_at_50_std": -0.04506401935703416,
+            "naucs_at_50_diff1": -0.34518311048379996,
+            "naucs_at_100_max": -0.22834972168166315,
+            "naucs_at_100_std": -0.04987710526052404,
+            "naucs_at_100_diff1": -0.33362168437999984
+        },
+        "vidore/synthetic_rse_restaurant_filtered_v1.0": {
+            "ndcg_at_1": 0.52632,
+            "ndcg_at_3": 0.48702,
+            "ndcg_at_5": 0.53392,
+            "ndcg_at_10": 0.5786,
+            "ndcg_at_20": 0.62288,
+            "ndcg_at_50": 0.64776,
+            "ndcg_at_100": 0.66238,
+            "map_at_1": 0.23981,
+            "map_at_3": 0.34792,
+            "map_at_5": 0.408,
+            "map_at_10": 0.4528,
+            "map_at_20": 0.48189,
+            "map_at_50": 0.49681,
+            "map_at_100": 0.50309,
+            "recall_at_1": 0.23981,
+            "recall_at_3": 0.41261,
+            "recall_at_5": 0.55802,
+            "recall_at_10": 0.71472,
+            "recall_at_20": 0.85539,
+            "recall_at_50": 0.9246,
+            "recall_at_100": 0.97368,
+            "precision_at_1": 0.52632,
+            "precision_at_3": 0.35088,
+            "precision_at_5": 0.29474,
+            "precision_at_10": 0.2,
+            "precision_at_20": 0.13158,
+            "precision_at_50": 0.06877,
+            "precision_at_100": 0.0386,
+            "mrr_at_1": 0.5087719298245614,
+            "mrr_at_3": 0.5877192982456141,
+            "mrr_at_5": 0.6149122807017543,
+            "mrr_at_10": 0.6263575605680868,
+            "mrr_at_20": 0.6303891040733146,
+            "mrr_at_50": 0.6303891040733146,
+            "mrr_at_100": 0.6305757408780925,
+            "naucs_at_1_max": -0.08333796614169701,
+            "naucs_at_1_std": 0.02230843203992766,
+            "naucs_at_1_diff1": 0.05161927720353975,
+            "naucs_at_3_max": -0.03209945113922174,
+            "naucs_at_3_std": 0.04489217019394448,
+            "naucs_at_3_diff1": -0.03516139834577249,
+            "naucs_at_5_max": -0.08062920691129684,
+            "naucs_at_5_std": 0.05137467110356768,
+            "naucs_at_5_diff1": -0.11589678137851257,
+            "naucs_at_10_max": -0.19710514484783043,
+            "naucs_at_10_std": -0.019483171394852755,
+            "naucs_at_10_diff1": -0.19216047549364354,
+            "naucs_at_20_max": -0.2492809947138442,
+            "naucs_at_20_std": -0.019880591668848344,
+            "naucs_at_20_diff1": -0.13918128050450226,
+            "naucs_at_50_max": -0.297692866142236,
+            "naucs_at_50_std": 0.015944137484135215,
+            "naucs_at_50_diff1": -0.10169694250184191,
+            "naucs_at_100_max": -0.3263966784104191,
+            "naucs_at_100_std": -0.004102727593586926,
+            "naucs_at_100_diff1": -0.06847320679987662
+        },
+        "vidore/synthetic_rse_restaurant_filtered_v1.0_multilingual": {
+            "ndcg_at_1": 0.51754,
+            "ndcg_at_3": 0.50334,
+            "ndcg_at_5": 0.54222,
+            "ndcg_at_10": 0.58819,
+            "ndcg_at_20": 0.6317,
+            "ndcg_at_50": 0.65775,
+            "ndcg_at_100": 0.67204,
+            "map_at_1": 0.24814,
+            "map_at_3": 0.36856,
+            "map_at_5": 0.42073,
+            "map_at_10": 0.4663,
+            "map_at_20": 0.49471,
+            "map_at_50": 0.50962,
+            "map_at_100": 0.51599,
+            "recall_at_1": 0.24814,
+            "recall_at_3": 0.43973,
+            "recall_at_5": 0.56658,
+            "recall_at_10": 0.71523,
+            "recall_at_20": 0.85844,
+            "recall_at_50": 0.93536,
+            "recall_at_100": 0.97971,
+            "precision_at_1": 0.51754,
+            "precision_at_3": 0.3538,
+            "precision_at_5": 0.29123,
+            "precision_at_10": 0.20219,
+            "precision_at_20": 0.13246,
+            "precision_at_50": 0.06825,
+            "precision_at_100": 0.03855,
+            "mrr_at_1": 0.5263157894736842,
+            "mrr_at_3": 0.6016081871345029,
+            "mrr_at_5": 0.6288011695906434,
+            "mrr_at_10": 0.6403230297967141,
+            "mrr_at_20": 0.6438437895094242,
+            "mrr_at_50": 0.6443535683849544,
+            "mrr_at_100": 0.644453714963128,
+            "naucs_at_1_max": 0.01943705593605114,
+            "naucs_at_1_std": 0.10857657836709796,
+            "naucs_at_1_diff1": 0.31024815892610824,
+            "naucs_at_3_max": -0.026602088034168622,
+            "naucs_at_3_std": 0.06693038369249117,
+            "naucs_at_3_diff1": 0.06536575914252513,
+            "naucs_at_5_max": -0.10400806660437836,
+            "naucs_at_5_std": 0.011805949215176312,
+            "naucs_at_5_diff1": -0.040514684440300354,
+            "naucs_at_10_max": -0.16246054552831016,
+            "naucs_at_10_std": -0.004963201865618914,
+            "naucs_at_10_diff1": -0.14424024163286125,
+            "naucs_at_20_max": -0.2068822330886405,
+            "naucs_at_20_std": 0.01182183229908038,
+            "naucs_at_20_diff1": -0.14776288061245282,
+            "naucs_at_50_max": -0.2382304805368312,
+            "naucs_at_50_std": 0.06100699728024982,
+            "naucs_at_50_diff1": -0.16038145767725714,
+            "naucs_at_100_max": -0.2526736911992193,
+            "naucs_at_100_std": 0.07538174516910437,
+            "naucs_at_100_diff1": -0.1425829565807366
+        },
+        "vidore/synthetic_mit_biomedical_tissue_interactions_unfiltered_multilingual": {
+            "ndcg_at_1": 0.55,
+            "ndcg_at_3": 0.54747,
+            "ndcg_at_5": 0.56523,
+            "ndcg_at_10": 0.60236,
+            "ndcg_at_20": 0.63192,
+            "ndcg_at_50": 0.65894,
+            "ndcg_at_100": 0.66916,
+            "map_at_1": 0.32887,
+            "map_at_3": 0.43891,
+            "map_at_5": 0.47556,
+            "map_at_10": 0.51112,
+            "map_at_20": 0.52597,
+            "map_at_50": 0.53502,
+            "map_at_100": 0.53747,
+            "recall_at_1": 0.32887,
+            "recall_at_3": 0.50621,
+            "recall_at_5": 0.59078,
+            "recall_at_10": 0.7021,
+            "recall_at_20": 0.78896,
+            "recall_at_50": 0.87545,
+            "recall_at_100": 0.91215,
+            "precision_at_1": 0.55,
+            "precision_at_3": 0.33698,
+            "precision_at_5": 0.25625,
+            "precision_at_10": 0.16797,
+            "precision_at_20": 0.10016,
+            "precision_at_50": 0.04875,
+            "precision_at_100": 0.02653,
+            "mrr_at_1": 0.5421875,
+            "mrr_at_3": 0.6315104166666667,
+            "mrr_at_5": 0.6464322916666666,
+            "mrr_at_10": 0.6572829861111111,
+            "mrr_at_20": 0.6611296462086782,
+            "mrr_at_50": 0.6623597304996574,
+            "mrr_at_100": 0.6624612115027679,
+            "naucs_at_1_max": 0.21639796742259326,
+            "naucs_at_1_std": -0.03628975637155085,
+            "naucs_at_1_diff1": 0.4459120269606122,
+            "naucs_at_3_max": 0.03669633870346066,
+            "naucs_at_3_std": -0.08520389607094239,
+            "naucs_at_3_diff1": 0.040408263382322174,
+            "naucs_at_5_max": -0.04548934564318856,
+            "naucs_at_5_std": -0.07038321737921435,
+            "naucs_at_5_diff1": -0.10616437737784175,
+            "naucs_at_10_max": -0.1278578682315184,
+            "naucs_at_10_std": -0.10363742277920715,
+            "naucs_at_10_diff1": -0.1945365842241381,
+            "naucs_at_20_max": -0.1602047364202233,
+            "naucs_at_20_std": -0.09160204736420194,
+            "naucs_at_20_diff1": -0.2459548568724758,
+            "naucs_at_50_max": -0.16751679776268727,
+            "naucs_at_50_std": -0.029600561994848625,
+            "naucs_at_50_diff1": -0.28501697067776877,
+            "naucs_at_100_max": -0.1840412781009732,
+            "naucs_at_100_std": -0.029290750955906976,
+            "naucs_at_100_diff1": -0.27754478203216754
+        },
+        "vidore/synthetic_axa_filtered_v1.0": {
+            "ndcg_at_1": 0.61111,
+            "ndcg_at_3": 0.63835,
+            "ndcg_at_5": 0.65099,
+            "ndcg_at_10": 0.66413,
+            "ndcg_at_20": 0.67711,
+            "ndcg_at_50": 0.72121,
+            "ndcg_at_100": 0.73402,
+            "map_at_1": 0.29544,
+            "map_at_3": 0.39626,
+            "map_at_5": 0.4688,
+            "map_at_10": 0.53459,
+            "map_at_20": 0.55814,
+            "map_at_50": 0.5817,
+            "map_at_100": 0.58632,
+            "recall_at_1": 0.29544,
+            "recall_at_3": 0.49889,
+            "recall_at_5": 0.59938,
+            "recall_at_10": 0.74741,
+            "recall_at_20": 0.80396,
+            "recall_at_50": 0.93358,
+            "recall_at_100": 0.97068,
+            "precision_at_1": 0.61111,
+            "precision_at_3": 0.46296,
+            "precision_at_5": 0.41111,
+            "precision_at_10": 0.28889,
+            "precision_at_20": 0.17222,
+            "precision_at_50": 0.08556,
+            "precision_at_100": 0.04611,
+            "mrr_at_1": 0.6111111111111112,
+            "mrr_at_3": 0.712962962962963,
+            "mrr_at_5": 0.712962962962963,
+            "mrr_at_10": 0.7222222222222222,
+            "mrr_at_20": 0.7222222222222222,
+            "mrr_at_50": 0.7239057239057238,
+            "mrr_at_100": 0.7239057239057238,
+            "naucs_at_1_max": 0.021377128555080525,
+            "naucs_at_1_std": -0.0024330874521297486,
+            "naucs_at_1_diff1": 0.6766922062442021,
+            "naucs_at_3_max": -0.5386403640030516,
+            "naucs_at_3_std": -0.339024827473691,
+            "naucs_at_3_diff1": -0.13999146573412896,
+            "naucs_at_5_max": -0.5536345449068457,
+            "naucs_at_5_std": -0.32978315319997686,
+            "naucs_at_5_diff1": -0.1515416702953352,
+            "naucs_at_10_max": -0.6913115857640982,
+            "naucs_at_10_std": -0.3590368655225717,
+            "naucs_at_10_diff1": -0.4262436564575138,
+            "naucs_at_20_max": -0.6790472085176197,
+            "naucs_at_20_std": -0.2636829434018275,
+            "naucs_at_20_diff1": -0.46059572736900606,
+            "naucs_at_50_max": -0.6436105950460144,
+            "naucs_at_50_std": -0.17446418069026137,
+            "naucs_at_50_diff1": -0.5059059487831842,
+            "naucs_at_100_max": -0.6463006925980164,
+            "naucs_at_100_std": -0.1497578630779427,
+            "naucs_at_100_diff1": -0.5112033641662738
+        },
+        "vidore/synthetic_economics_macro_economy_2024_filtered_v1.0": {
+            "ndcg_at_1": 0.65517,
+            "ndcg_at_3": 0.63819,
+            "ndcg_at_5": 0.61521,
+            "ndcg_at_10": 0.5906,
+            "ndcg_at_20": 0.60536,
+            "ndcg_at_50": 0.66285,
+            "ndcg_at_100": 0.70251,
+            "map_at_1": 0.06921,
+            "map_at_3": 0.18532,
+            "map_at_5": 0.25479,
+            "map_at_10": 0.33604,
+            "map_at_20": 0.39502,
+            "map_at_50": 0.44809,
+            "map_at_100": 0.47767,
+            "recall_at_1": 0.06921,
+            "recall_at_3": 0.23206,
+            "recall_at_5": 0.33701,
+            "recall_at_10": 0.4769,
+            "recall_at_20": 0.60612,
+            "recall_at_50": 0.78689,
+            "recall_at_100": 0.90662,
+            "precision_at_1": 0.65517,
+            "precision_at_3": 0.6092,
+            "precision_at_5": 0.55862,
+            "precision_at_10": 0.4431,
+            "precision_at_20": 0.32241,
+            "precision_at_50": 0.19414,
+            "precision_at_100": 0.12759,
+            "mrr_at_1": 0.6206896551724138,
+            "mrr_at_3": 0.735632183908046,
+            "mrr_at_5": 0.7468390804597702,
+            "mrr_at_10": 0.7525862068965516,
+            "mrr_at_20": 0.753448275862069,
+            "mrr_at_50": 0.7547113805734496,
+            "mrr_at_100": 0.7547113805734496,
+            "naucs_at_1_max": 0.3467227583809364,
+            "naucs_at_1_std": 0.46598561053805343,
+            "naucs_at_1_diff1": 0.2918337389920708,
+            "naucs_at_3_max": 0.2797595385222247,
+            "naucs_at_3_std": 0.2864952959792759,
+            "naucs_at_3_diff1": 0.3913734193294228,
+            "naucs_at_5_max": 0.25393027831323856,
+            "naucs_at_5_std": 0.3095747235773235,
+            "naucs_at_5_diff1": 0.3645542505594328,
+            "naucs_at_10_max": 0.2700390408547681,
+            "naucs_at_10_std": 0.32232824677308325,
+            "naucs_at_10_diff1": 0.2680868235374596,
+            "naucs_at_20_max": 0.17701498139167485,
+            "naucs_at_20_std": 0.2949738297548142,
+            "naucs_at_20_diff1": 0.22778015683462374,
+            "naucs_at_50_max": 0.005866671306298476,
+            "naucs_at_50_std": 0.18617183360877715,
+            "naucs_at_50_diff1": 0.2543543566117715,
+            "naucs_at_100_max": -0.051660558691400224,
+            "naucs_at_100_std": 0.12365631128720926,
+            "naucs_at_100_diff1": 0.27211571635112025
+        }, "../colpali/data_dir/eval_vidore/tatdqa_test": {"ndcg_at_1": 0.70109, "ndcg_at_3": 0.7923, "ndcg_at_5": 0.81432, "ndcg_at_10": 0.82714, "ndcg_at_20": 0.83121, "ndcg_at_50": 0.83523, "ndcg_at_100": 0.83688, "map_at_1": 0.70109, "map_at_3": 0.77005, "map_at_5": 0.78241, "map_at_10": 0.78785, "map_at_20": 0.78901, "map_at_50": 0.78967, "map_at_100": 0.78981, "recall_at_1": 0.70109, "recall_at_3": 0.85662, "recall_at_5": 0.90948, "recall_at_10": 0.94836, "recall_at_20": 0.96416, "recall_at_50": 0.9842, "recall_at_100": 0.99453, "precision_at_1": 0.70109, "precision_at_3": 0.28554, "precision_at_5": 0.1819, "precision_at_10": 0.09484, "precision_at_20": 0.04821, "precision_at_50": 0.01968, "precision_at_100": 0.00995, "mrr_at_1": 0.7017010935601458, "mrr_at_3": 0.7709599027946537, "mrr_at_5": 0.7835965978128797, "mrr_at_10": 0.7887963123686088, "mrr_at_20": 0.7900069324299046, "mrr_at_50": 0.7906871013277383, "mrr_at_100": 0.7908122337289102, "naucs_at_1_max": 0.31323987398365244, "naucs_at_1_std": -0.06973458951296707, "naucs_at_1_diff1": 0.8257166838058789, "naucs_at_3_max": 0.3926659737473996, "naucs_at_3_std": 0.06571917292636736, "naucs_at_3_diff1": 0.7756394870292411, "naucs_at_5_max": 0.397380358594201, "naucs_at_5_std": 0.13868537387746546, "naucs_at_5_diff1": 0.7396051349982516, "naucs_at_10_max": 0.501090641931301, "naucs_at_10_std": 0.36694999105347903, "naucs_at_10_diff1": 0.7174193926609567, "naucs_at_20_max": 0.4765529924218107, "naucs_at_20_std": 0.3737660736114713, "naucs_at_20_diff1": 0.6900626633277733, "naucs_at_50_max": 0.6429809235278162, "naucs_at_50_std": 0.5965680225588305, "naucs_at_50_diff1": 0.7246754428536203, "naucs_at_100_max": 0.6364067564396122, "naucs_at_100_std": 0.5448683368190432, "naucs_at_100_diff1": 0.6812946217425379}, "../colpali/data_dir/eval_vidore/shiftproject_test": {"ndcg_at_1": 0.81, "ndcg_at_3": 0.8894, "ndcg_at_5": 0.90663, "ndcg_at_10": 0.90952, "ndcg_at_20": 0.90952, "ndcg_at_50": 0.9115, "ndcg_at_100": 0.9115, "map_at_1": 0.81, "map_at_3": 0.87167, "map_at_5": 0.88167, "map_at_10": 0.88267, "map_at_20": 0.88267, "map_at_50": 0.88298, "map_at_100": 0.88298, "recall_at_1": 0.81, "recall_at_3": 0.94, "recall_at_5": 0.98, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.81, "precision_at_3": 0.31333, "precision_at_5": 0.196, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.82, "mrr_at_3": 0.8766666666666667, "mrr_at_5": 0.8866666666666667, "mrr_at_10": 0.8877777777777777, "mrr_at_20": 0.8877777777777777, "mrr_at_50": 0.8880902777777777, "mrr_at_100": 0.8880902777777777, "naucs_at_1_max": 0.0649588768400645, "naucs_at_1_std": -0.2740845513122743, "naucs_at_1_diff1": 0.6852637644716851, "naucs_at_3_max": 0.3546529723000309, "naucs_at_3_std": -0.11009959539371503, "naucs_at_3_diff1": 0.5325241207594116, "naucs_at_5_max": 0.722222222222224, "naucs_at_5_std": 0.3384687208216692, "naucs_at_5_diff1": 0.42250233426704475, "naucs_at_10_max": 0.7222222222222276, "naucs_at_10_std": 0.5541549953314738, "naucs_at_10_diff1": 0.7222222222222276, "naucs_at_20_max": 0.7222222222222276, "naucs_at_20_std": 0.5541549953314738, "naucs_at_20_diff1": 0.7222222222222276, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}, "../colpali/data_dir/eval_vidore/syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.99, "ndcg_at_3": 0.99, "ndcg_at_5": 0.99387, "ndcg_at_10": 0.99387, "ndcg_at_20": 0.99387, "ndcg_at_50": 0.99387, "ndcg_at_100": 0.99387, "map_at_1": 0.99, "map_at_3": 0.99, "map_at_5": 0.992, "map_at_10": 0.992, "map_at_20": 0.992, "map_at_50": 0.992, "map_at_100": 0.992, "recall_at_1": 0.99, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.99, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.99, "mrr_at_3": 0.99, "mrr_at_5": 0.992, "mrr_at_10": 0.992, "mrr_at_20": 0.992, "mrr_at_50": 0.992, "mrr_at_100": 0.992, "naucs_at_1_max": 0.8692810457516276, "naucs_at_1_std": -0.5634920634920657, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 0.8692810457516356, "naucs_at_3_std": -0.5634920634921204, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}, "../colpali/data_dir/eval_vidore/syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.91, "ndcg_at_3": 0.95417, "ndcg_at_5": 0.96278, "ndcg_at_10": 0.96278, "ndcg_at_20": 0.96278, "ndcg_at_50": 0.96278, "ndcg_at_100": 0.96278, "map_at_1": 0.91, "map_at_3": 0.945, "map_at_5": 0.95, "map_at_10": 0.95, "map_at_20": 0.95, "map_at_50": 0.95, "map_at_100": 0.95, "recall_at_1": 0.91, "recall_at_3": 0.98, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.91, "precision_at_3": 0.32667, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.92, "mrr_at_3": 0.9566666666666667, "mrr_at_5": 0.9566666666666667, "mrr_at_10": 0.9566666666666667, "mrr_at_20": 0.9566666666666667, "mrr_at_50": 0.9566666666666667, "mrr_at_100": 0.9566666666666667, "naucs_at_1_max": 0.6380329909741665, "naucs_at_1_std": 0.3872289656603365, "naucs_at_1_diff1": 0.9419026870007259, "naucs_at_3_max": 0.8611111111111119, "naucs_at_3_std": 0.5401493930905577, "naucs_at_3_diff1": 0.9346405228758099, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}, "../colpali/data_dir/eval_vidore/arxivqa_test_subsampled": {"ndcg_at_1": 0.824, "ndcg_at_3": 0.86576, "ndcg_at_5": 0.88065, "ndcg_at_10": 0.88761, "ndcg_at_20": 0.89161, "ndcg_at_50": 0.89571, "ndcg_at_100": 0.89671, "map_at_1": 0.824, "map_at_3": 0.85533, "map_at_5": 0.86363, "map_at_10": 0.86642, "map_at_20": 0.86749, "map_at_50": 0.8682, "map_at_100": 0.8683, "recall_at_1": 0.824, "recall_at_3": 0.896, "recall_at_5": 0.932, "recall_at_10": 0.954, "recall_at_20": 0.97, "recall_at_50": 0.99, "recall_at_100": 0.996, "precision_at_1": 0.824, "precision_at_3": 0.29867, "precision_at_5": 0.1864, "precision_at_10": 0.0954, "precision_at_20": 0.0485, "precision_at_50": 0.0198, "precision_at_100": 0.00996, "mrr_at_1": 0.826, "mrr_at_3": 0.8566666666666666, "mrr_at_5": 0.8647666666666667, "mrr_at_10": 0.8674404761904762, "mrr_at_20": 0.868713893416525, "mrr_at_50": 0.8694424481678229, "mrr_at_100": 0.8695414304104477, "naucs_at_1_max": 0.7663278157610652, "naucs_at_1_std": -0.09768014573587591, "naucs_at_1_diff1": 0.9187263853904275, "naucs_at_3_max": 0.8138309391467555, "naucs_at_3_std": -0.023388435717895166, "naucs_at_3_diff1": 0.8748014870425184, "naucs_at_5_max": 0.8622919756137744, "naucs_at_5_std": 0.09893172955456826, "naucs_at_5_diff1": 0.8950952930191681, "naucs_at_10_max": 0.8887671010433149, "naucs_at_10_std": 0.18117971826411405, "naucs_at_10_diff1": 0.9102829537612158, "naucs_at_20_max": 0.9081854964207933, "naucs_at_20_std": 0.31092436974789345, "naucs_at_20_diff1": 0.9193899782135069, "naucs_at_50_max": 1.0, "naucs_at_50_std": 0.7428571428571219, "naucs_at_50_diff1": 0.9215686274509768, "naucs_at_100_max": 1.0, "naucs_at_100_std": 0.5613912231558791, "naucs_at_100_diff1": 0.9346405228758466}, "../colpali/data_dir/eval_vidore/docvqa_test_subsampled": {"ndcg_at_1": 0.52993, "ndcg_at_3": 0.58573, "ndcg_at_5": 0.60558, "ndcg_at_10": 0.62266, "ndcg_at_20": 0.63582, "ndcg_at_50": 0.64908, "ndcg_at_100": 0.6581, "map_at_1": 0.52993, "map_at_3": 0.57206, "map_at_5": 0.58293, "map_at_10": 0.58991, "map_at_20": 0.59366, "map_at_50": 0.59581, "map_at_100": 0.59662, "recall_at_1": 0.52993, "recall_at_3": 0.62528, "recall_at_5": 0.67406, "recall_at_10": 0.72727, "recall_at_20": 0.77827, "recall_at_50": 0.84479, "recall_at_100": 0.90022, "precision_at_1": 0.52993, "precision_at_3": 0.20843, "precision_at_5": 0.13481, "precision_at_10": 0.07273, "precision_at_20": 0.03891, "precision_at_50": 0.0169, "precision_at_100": 0.009, "mrr_at_1": 0.5232815964523282, "mrr_at_3": 0.5728011825572801, "mrr_at_5": 0.5826681448632669, "mrr_at_10": 0.5891273360785556, "mrr_at_20": 0.5925723253011941, "mrr_at_50": 0.5943296372892853, "mrr_at_100": 0.59515893309537, "naucs_at_1_max": 0.1489445922678588, "naucs_at_1_std": 0.5944168012150516, "naucs_at_1_diff1": 0.9184631698098695, "naucs_at_3_max": 0.01752251361569101, "naucs_at_3_std": 0.7203256897672715, "naucs_at_3_diff1": 0.8894280781702324, "naucs_at_5_max": -0.04548428009159777, "naucs_at_5_std": 0.7690453232976177, "naucs_at_5_diff1": 0.8644439246273121, "naucs_at_10_max": -0.13892914655317037, "naucs_at_10_std": 0.8207534993760452, "naucs_at_10_diff1": 0.8438993391008719, "naucs_at_20_max": -0.27349363756086625, "naucs_at_20_std": 0.8575543278988474, "naucs_at_20_diff1": 0.8389957258659689, "naucs_at_50_max": -0.43528705588098854, "naucs_at_50_std": 0.9002887869267794, "naucs_at_50_diff1": 0.8407727389585528, "naucs_at_100_max": -0.4873376707138829, "naucs_at_100_std": 0.8779309480426964, "naucs_at_100_diff1": 0.8126388341509826}, "../colpali/data_dir/eval_vidore/syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.96, "ndcg_at_3": 0.97762, "ndcg_at_5": 0.98149, "ndcg_at_10": 0.98149, "ndcg_at_20": 0.98149, "ndcg_at_50": 0.98149, "ndcg_at_100": 0.98149, "map_at_1": 0.96, "map_at_3": 0.97333, "map_at_5": 0.97533, "map_at_10": 0.97533, "map_at_20": 0.97533, "map_at_50": 0.97533, "map_at_100": 0.97533, "recall_at_1": 0.96, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.96, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.96, "mrr_at_3": 0.9733333333333333, "mrr_at_5": 0.9753333333333333, "mrr_at_10": 0.9753333333333333, "mrr_at_20": 0.9753333333333333, "mrr_at_50": 0.9753333333333333, "mrr_at_100": 0.9753333333333333, "naucs_at_1_max": 0.6785714285714297, "naucs_at_1_std": -0.529178338001864, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.35807656395889226, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}, "../colpali/data_dir/eval_vidore/infovqa_test_subsampled": {"ndcg_at_1": 0.89069, "ndcg_at_3": 0.91896, "ndcg_at_5": 0.92636, "ndcg_at_10": 0.93041, "ndcg_at_20": 0.93306, "ndcg_at_50": 0.93549, "ndcg_at_100": 0.93583, "map_at_1": 0.89069, "map_at_3": 0.91194, "map_at_5": 0.91599, "map_at_10": 0.91774, "map_at_20": 0.91851, "map_at_50": 0.91891, "map_at_100": 0.91894, "recall_at_1": 0.89069, "recall_at_3": 0.93927, "recall_at_5": 0.95749, "recall_at_10": 0.96964, "recall_at_20": 0.97976, "recall_at_50": 0.9919, "recall_at_100": 0.99393, "precision_at_1": 0.89069, "precision_at_3": 0.31309, "precision_at_5": 0.1915, "precision_at_10": 0.09696, "precision_at_20": 0.04899, "precision_at_50": 0.01984, "precision_at_100": 0.00994, "mrr_at_1": 0.888663967611336, "mrr_at_3": 0.9102564102564102, "mrr_at_5": 0.9148110661268556, "mrr_at_10": 0.9165558126084441, "mrr_at_20": 0.9173312497615903, "mrr_at_50": 0.917719246204226, "mrr_at_100": 0.917752984395859, "naucs_at_1_max": 0.6452597879999299, "naucs_at_1_std": -0.1583967206391628, "naucs_at_1_diff1": 0.9382939669972173, "naucs_at_3_max": 0.6792175380169426, "naucs_at_3_std": -0.07352824072374452, "naucs_at_3_diff1": 0.9330460900047705, "naucs_at_5_max": 0.8390629595590892, "naucs_at_5_std": 0.22116664077349757, "naucs_at_5_diff1": 0.9626852988386374, "naucs_at_10_max": 0.8657206615100057, "naucs_at_10_std": 0.2744376241740944, "naucs_at_10_diff1": 0.9564661819784096, "naucs_at_20_max": 0.9183327616354467, "naucs_at_20_std": 0.6804227380555364, "naucs_at_20_diff1": 0.9477594183740975, "naucs_at_50_max": 0.8979159520443043, "naucs_at_50_std": 0.674633185026432, "naucs_at_50_diff1": 0.9346992729676393, "naucs_at_100_max": 0.8638879360590712, "naucs_at_100_std": 0.5661775800352391, "naucs_at_100_diff1": 0.9129323639568517}, "../colpali/data_dir/eval_vidore/syntheticDocQA_energy_test": {"ndcg_at_1": 0.94, "ndcg_at_3": 0.95893, "ndcg_at_5": 0.95893, "ndcg_at_10": 0.96542, "ndcg_at_20": 0.96542, "ndcg_at_50": 0.9676, "ndcg_at_100": 0.9676, "map_at_1": 0.94, "map_at_3": 0.955, "map_at_5": 0.955, "map_at_10": 0.95768, "map_at_20": 0.95768, "map_at_50": 0.95811, "map_at_100": 0.95811, "recall_at_1": 0.94, "recall_at_3": 0.97, "recall_at_5": 0.97, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.32333, "precision_at_5": 0.194, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.95, "mrr_at_3": 0.96, "mrr_at_5": 0.96, "mrr_at_10": 0.9625396825396826, "mrr_at_20": 0.9630659983291562, "mrr_at_50": 0.9630659983291562, "mrr_at_100": 0.9630659983291562, "naucs_at_1_max": 0.06847183317771653, "naucs_at_1_std": -0.9000933706816028, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 0.6150015561780285, "naucs_at_3_std": -1.21708683473389, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 0.6150015561780299, "naucs_at_5_std": -1.2170868347338937, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.7222222222222276, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.7222222222222276, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}, "../colpali/data_dir/eval_vidore/tabfquad_test_subsampled": {"ndcg_at_1": 0.84286, "ndcg_at_3": 0.88737, "ndcg_at_5": 0.89459, "ndcg_at_10": 0.89794, "ndcg_at_20": 0.90691, "ndcg_at_50": 0.90985, "ndcg_at_100": 0.91045, "map_at_1": 0.84286, "map_at_3": 0.87679, "map_at_5": 0.88071, "map_at_10": 0.88203, "map_at_20": 0.88445, "map_at_50": 0.88496, "map_at_100": 0.88502, "recall_at_1": 0.84286, "recall_at_3": 0.91786, "recall_at_5": 0.93571, "recall_at_10": 0.94643, "recall_at_20": 0.98214, "recall_at_50": 0.99643, "recall_at_100": 1.0, "precision_at_1": 0.84286, "precision_at_3": 0.30595, "precision_at_5": 0.18714, "precision_at_10": 0.09464, "precision_at_20": 0.04911, "precision_at_50": 0.01993, "precision_at_100": 0.01, "mrr_at_1": 0.8392857142857143, "mrr_at_3": 0.8744047619047619, "mrr_at_5": 0.8783333333333333, "mrr_at_10": 0.8797108843537415, "mrr_at_20": 0.8821667166283763, "mrr_at_50": 0.8826871907663073, "mrr_at_100": 0.8827487671209872, "naucs_at_1_max": 0.5016709740664825, "naucs_at_1_std": 0.19134881026646522, "naucs_at_1_diff1": 0.9055454059353004, "naucs_at_3_max": 0.6889741403807901, "naucs_at_3_std": 0.34459870904883405, "naucs_at_3_diff1": 0.8472780416514442, "naucs_at_5_max": 0.6857039111941077, "naucs_at_5_std": 0.40758896151053337, "naucs_at_5_diff1": 0.8275495383338527, "naucs_at_10_max": 0.6228446934329287, "naucs_at_10_std": 0.31633986928104846, "naucs_at_10_diff1": 0.8017740429505158, "naucs_at_20_max": 0.9183006535947714, "naucs_at_20_std": 0.7605042016806759, "naucs_at_20_diff1": 0.9183006535947714, "naucs_at_50_max": 1.0, "naucs_at_50_std": 1.0, "naucs_at_50_diff1": 0.8692810457515607, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0}}