Add data collator for cell classification and example for cell classification

Files changed (6) hide show

examples/cell_classification.ipynb +1954 -0
examples/pretrain_geneformer_w_deepspeed.py +2 -2
geneformer/__init__.py +8 -0
geneformer/collator_for_cell_classification.py +581 -0
geneformer/{trainer.py → pretrainer.py} +3 -3
geneformer/tokenizer.py +3 -3

examples/cell_classification.ipynb ADDED Viewed

	@@ -0,0 +1,1954 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "234afff3",
+   "metadata": {},
+   "source": [
+    "## Geneformer Fine-Tuning for Cell Annotation Application"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1cbe6178-ea4d-478a-80a8-65ffaa4c1820",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "GPU_NUMBER = [0]\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \",\".join([str(s) for s in GPU_NUMBER])\n",
+    "os.environ[\"NCCL_DEBUG\"] = \"INFO\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a9885d9f-00ac-4c84-b6a3-b7b648a90f0f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "from collections import Counter\n",
+    "import datetime\n",
+    "import pickle\n",
+    "import subprocess\n",
+    "import seaborn as sns; sns.set()\n",
+    "from datasets import load_from_disk\n",
+    "from sklearn.metrics import accuracy_score, f1_score\n",
+    "from transformers import BertForSequenceClassification\n",
+    "from transformers import Trainer\n",
+    "from transformers.training_args import TrainingArguments\n",
+    "\n",
+    "from geneformer import DataCollatorForCellClassification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "68bd3b98-5409-4105-b7af-f1ff64ea6a72",
+   "metadata": {},
+   "source": [
+    "## Prepare training and evaluation datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "5735f1b7-7595-4a02-be17-2c5b970ad81a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load train dataset (includes all tissues)\n",
+    "train_dataset=load_from_disk(\"/path/to/cell_type_train_data.dataset\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "60eb8b0b-03ba-4065-98e3-0e424a9174ad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load evaluation dataset (includes all tissues)\n",
+    "eval_dataset=load_from_disk(\"/path/to/cell_type_test_data.dataset\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a4297a02-4c4c-434c-ae55-3387a0b239b5",
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "dataset_list = []\n",
+    "evalset_list = []\n",
+    "organ_list = []\n",
+    "target_dict_list = []\n",
+    "\n",
+    "for organ in Counter(train_dataset[\"organ_major\"]).keys():\n",
+    "    # collect list of tissues for fine-tuning (immune and bone marrow are included together)\n",
+    "    if organ in [\"bone_marrow\"]:  \n",
+    "        continue\n",
+    "    elif organ==\"immune\":\n",
+    "        organ_ids = [\"immune\",\"bone_marrow\"]\n",
+    "        organ_list += [\"immune\"]\n",
+    "    else:\n",
+    "        organ_ids = [organ]\n",
+    "        organ_list += [organ]\n",
+    "    \n",
+    "    print(organ)\n",
+    "    \n",
+    "    # filter datasets for given organ\n",
+    "    def if_organ(example):\n",
+    "        return example[\"organ_major\"] in organ_ids\n",
+    "    trainset_organ = train_dataset.filter(if_organ, num_proc=16)\n",
+    "    \n",
+    "    # per scDeepsort published method, drop cell types representing <0.5% of cells\n",
+    "    celltype_counter = Counter(trainset_organ[\"cell_type\"])\n",
+    "    total_cells = sum(celltype_counter.values())\n",
+    "    cells_to_keep = [k for k,v in celltype_counter.items() if v>(0.005*total_cells)]\n",
+    "    def if_not_rare_celltype(example):\n",
+    "        return example[\"cell_type\"] in cells_to_keep\n",
+    "    trainset_organ_subset = trainset_organ.filter(if_not_rare_celltype, num_proc=16)\n",
+    "      \n",
+    "    # shuffle datasets and rename columns\n",
+    "    trainset_organ_shuffled = trainset_organ_subset.shuffle(seed=42)\n",
+    "    trainset_organ_shuffled = trainset_organ_shuffled.rename_column(\"cell_type\",\"label\")\n",
+    "    trainset_organ_shuffled = trainset_organ_shuffled.remove_columns(\"organ_major\")\n",
+    "    \n",
+    "    # create dictionary of cell types : label ids\n",
+    "    target_names = list(Counter(trainset_organ_shuffled[\"label\"]).keys())\n",
+    "    target_name_id_dict = dict(zip(target_names,[i for i in range(len(target_names))]))\n",
+    "    target_dict_list += [target_name_id_dict]\n",
+    "    \n",
+    "    # change labels to numerical ids\n",
+    "    def classes_to_ids(example):\n",
+    "        example[\"label\"] = target_name_id_dict[example[\"label\"]]\n",
+    "        return example\n",
+    "    labeled_trainset = trainset_organ_shuffled.map(classes_to_ids, num_proc=16)\n",
+    "    \n",
+    "    # create 80/20 train/eval splits\n",
+    "    labeled_train_split = labeled_trainset.select([i for i in range(0,round(len(labeled_trainset)*0.8))])\n",
+    "    labeled_eval_split = labeled_trainset.select([i for i in range(round(len(labeled_trainset)*0.8),len(labeled_trainset))])\n",
+    "    \n",
+    "    # filter dataset for cell types in corresponding training set\n",
+    "    trained_labels = list(Counter(labeled_train_split[\"label\"]).keys())\n",
+    "    def if_trained_label(example):\n",
+    "        return example[\"label\"] in trained_labels\n",
+    "    labeled_eval_split_subset = labeled_eval_split.filter(if_trained_label, num_proc=16)\n",
+    "\n",
+    "    dataset_list += [labeled_train_split]\n",
+    "    evalset_list += [labeled_eval_split_subset]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "83e20521-597a-4c54-897b-c4d42ea622c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainset_dict = dict(zip(organ_list,dataset_list))\n",
+    "traintargetdict_dict = dict(zip(organ_list,target_dict_list))\n",
+    "\n",
+    "evalset_dict = dict(zip(organ_list,evalset_list))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10eb110d-ba43-4efc-bc43-1815d6912647",
+   "metadata": {},
+   "source": [
+    "## Fine-Tune With Cell Classification Learning Objective and Quantify Predictive Performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "cd7b1cfb-f5cb-460e-ae77-769522ece054",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_metrics(pred):\n",
+    "    labels = pred.label_ids\n",
+    "    preds = pred.predictions.argmax(-1)\n",
+    "    # calculate accuracy and macro f1 using sklearn's function\n",
+    "    acc = accuracy_score(labels, preds)\n",
+    "    macro_f1 = f1_score(labels, preds, average='macro')\n",
+    "    return {\n",
+    "      'accuracy': acc,\n",
+    "      'macro_f1': macro_f1\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "d24e1ab7-0131-44bd-b458-1ce5ba31853e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set model parameters\n",
+    "# max input size\n",
+    "max_input_size = 2 ** 11  # 2048\n",
+    "\n",
+    "# set training parameters\n",
+    "# max learning rate\n",
+    "max_lr = 5e-5\n",
+    "# how many pretrained layers to freeze\n",
+    "freeze_layers = 0\n",
+    "# number gpus\n",
+    "num_gpus = 1\n",
+    "# number cpu cores\n",
+    "num_proc = 16\n",
+    "# batch size for training and eval\n",
+    "geneformer_batch_size = 12\n",
+    "# learning schedule\n",
+    "lr_schedule_fn = \"linear\"\n",
+    "# warmup steps\n",
+    "warmup_steps = 500\n",
+    "# number of epochs\n",
+    "epochs = 10\n",
+    "# optimizer\n",
+    "optimizer = \"adamw\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "05164c24-5fbf-4372-b26c-a43f3777a88d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "spleen\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='10280' max='10280' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [10280/10280 13:33, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Macro F1</th>\n",
+       "      <th>Weighted F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.087000</td>\n",
+       "      <td>0.068067</td>\n",
+       "      <td>0.985404</td>\n",
+       "      <td>0.956839</td>\n",
+       "      <td>0.985483</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.044400</td>\n",
+       "      <td>0.075289</td>\n",
+       "      <td>0.985079</td>\n",
+       "      <td>0.955069</td>\n",
+       "      <td>0.984898</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.066700</td>\n",
+       "      <td>0.078703</td>\n",
+       "      <td>0.983782</td>\n",
+       "      <td>0.953240</td>\n",
+       "      <td>0.983959</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.037400</td>\n",
+       "      <td>0.057132</td>\n",
+       "      <td>0.989945</td>\n",
+       "      <td>0.970619</td>\n",
+       "      <td>0.989883</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.025000</td>\n",
+       "      <td>0.061644</td>\n",
+       "      <td>0.988323</td>\n",
+       "      <td>0.961126</td>\n",
+       "      <td>0.988211</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.022400</td>\n",
+       "      <td>0.065323</td>\n",
+       "      <td>0.989296</td>\n",
+       "      <td>0.969737</td>\n",
+       "      <td>0.989362</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.018600</td>\n",
+       "      <td>0.063710</td>\n",
+       "      <td>0.989620</td>\n",
+       "      <td>0.969436</td>\n",
+       "      <td>0.989579</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.039800</td>\n",
+       "      <td>0.065919</td>\n",
+       "      <td>0.989945</td>\n",
+       "      <td>0.968065</td>\n",
+       "      <td>0.989802</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.030200</td>\n",
+       "      <td>0.061359</td>\n",
+       "      <td>0.990269</td>\n",
+       "      <td>0.971700</td>\n",
+       "      <td>0.990314</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.013400</td>\n",
+       "      <td>0.059181</td>\n",
+       "      <td>0.991567</td>\n",
+       "      <td>0.974599</td>\n",
+       "      <td>0.991552</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='257' max='257' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [257/257 00:07]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "kidney\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='29340' max='29340' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [29340/29340 45:43, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Macro F1</th>\n",
+       "      <th>Weighted F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.326900</td>\n",
+       "      <td>0.299193</td>\n",
+       "      <td>0.912500</td>\n",
+       "      <td>0.823067</td>\n",
+       "      <td>0.909627</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.224200</td>\n",
+       "      <td>0.239580</td>\n",
+       "      <td>0.926477</td>\n",
+       "      <td>0.850237</td>\n",
+       "      <td>0.923902</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.221600</td>\n",
+       "      <td>0.242810</td>\n",
+       "      <td>0.930227</td>\n",
+       "      <td>0.878553</td>\n",
+       "      <td>0.930349</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.166100</td>\n",
+       "      <td>0.264178</td>\n",
+       "      <td>0.933409</td>\n",
+       "      <td>0.884759</td>\n",
+       "      <td>0.933031</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.144100</td>\n",
+       "      <td>0.279282</td>\n",
+       "      <td>0.935000</td>\n",
+       "      <td>0.887659</td>\n",
+       "      <td>0.934987</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.112800</td>\n",
+       "      <td>0.307647</td>\n",
+       "      <td>0.935909</td>\n",
+       "      <td>0.889239</td>\n",
+       "      <td>0.935365</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.084600</td>\n",
+       "      <td>0.326399</td>\n",
+       "      <td>0.932841</td>\n",
+       "      <td>0.892447</td>\n",
+       "      <td>0.933191</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.068300</td>\n",
+       "      <td>0.332626</td>\n",
+       "      <td>0.936591</td>\n",
+       "      <td>0.891629</td>\n",
+       "      <td>0.936354</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.065500</td>\n",
+       "      <td>0.348174</td>\n",
+       "      <td>0.935227</td>\n",
+       "      <td>0.889484</td>\n",
+       "      <td>0.935040</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.046100</td>\n",
+       "      <td>0.355350</td>\n",
+       "      <td>0.935000</td>\n",
+       "      <td>0.894578</td>\n",
+       "      <td>0.934971</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='734' max='734' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [734/734 00:27]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "lung\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='21750' max='21750' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [21750/21750 30:32, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Macro F1</th>\n",
+       "      <th>Weighted F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.337600</td>\n",
+       "      <td>0.341523</td>\n",
+       "      <td>0.906360</td>\n",
+       "      <td>0.759979</td>\n",
+       "      <td>0.899310</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.211900</td>\n",
+       "      <td>0.258954</td>\n",
+       "      <td>0.928429</td>\n",
+       "      <td>0.835534</td>\n",
+       "      <td>0.925903</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.208600</td>\n",
+       "      <td>0.282081</td>\n",
+       "      <td>0.930421</td>\n",
+       "      <td>0.842786</td>\n",
+       "      <td>0.928013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.144400</td>\n",
+       "      <td>0.253047</td>\n",
+       "      <td>0.935479</td>\n",
+       "      <td>0.871712</td>\n",
+       "      <td>0.935234</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.109200</td>\n",
+       "      <td>0.268833</td>\n",
+       "      <td>0.939464</td>\n",
+       "      <td>0.876173</td>\n",
+       "      <td>0.938870</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.132700</td>\n",
+       "      <td>0.282697</td>\n",
+       "      <td>0.940536</td>\n",
+       "      <td>0.883271</td>\n",
+       "      <td>0.940191</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.081800</td>\n",
+       "      <td>0.295864</td>\n",
+       "      <td>0.940843</td>\n",
+       "      <td>0.884201</td>\n",
+       "      <td>0.940170</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.035900</td>\n",
+       "      <td>0.306600</td>\n",
+       "      <td>0.941916</td>\n",
+       "      <td>0.884777</td>\n",
+       "      <td>0.941578</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.050800</td>\n",
+       "      <td>0.311677</td>\n",
+       "      <td>0.940536</td>\n",
+       "      <td>0.883437</td>\n",
+       "      <td>0.940294</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.035800</td>\n",
+       "      <td>0.315360</td>\n",
+       "      <td>0.940843</td>\n",
+       "      <td>0.883551</td>\n",
+       "      <td>0.940612</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='544' max='544' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [544/544 00:19]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "brain\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='8880' max='8880' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [8880/8880 11:14, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Macro F1</th>\n",
+       "      <th>Weighted F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.163100</td>\n",
+       "      <td>0.156640</td>\n",
+       "      <td>0.970345</td>\n",
+       "      <td>0.736455</td>\n",
+       "      <td>0.960714</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.149800</td>\n",
+       "      <td>0.134897</td>\n",
+       "      <td>0.968844</td>\n",
+       "      <td>0.747114</td>\n",
+       "      <td>0.960726</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.105600</td>\n",
+       "      <td>0.115354</td>\n",
+       "      <td>0.972222</td>\n",
+       "      <td>0.775271</td>\n",
+       "      <td>0.964932</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.086900</td>\n",
+       "      <td>0.207918</td>\n",
+       "      <td>0.968844</td>\n",
+       "      <td>0.707927</td>\n",
+       "      <td>0.958257</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.056400</td>\n",
+       "      <td>0.106548</td>\n",
+       "      <td>0.974099</td>\n",
+       "      <td>0.839838</td>\n",
+       "      <td>0.971611</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.037600</td>\n",
+       "      <td>0.117437</td>\n",
+       "      <td>0.978228</td>\n",
+       "      <td>0.856578</td>\n",
+       "      <td>0.975665</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.030500</td>\n",
+       "      <td>0.127885</td>\n",
+       "      <td>0.974474</td>\n",
+       "      <td>0.856296</td>\n",
+       "      <td>0.973531</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.019300</td>\n",
+       "      <td>0.143203</td>\n",
+       "      <td>0.977853</td>\n",
+       "      <td>0.859362</td>\n",
+       "      <td>0.975776</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.007400</td>\n",
+       "      <td>0.153758</td>\n",
+       "      <td>0.972598</td>\n",
+       "      <td>0.852835</td>\n",
+       "      <td>0.972314</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.017200</td>\n",
+       "      <td>0.153911</td>\n",
+       "      <td>0.975976</td>\n",
+       "      <td>0.858196</td>\n",
+       "      <td>0.974498</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='222' max='222' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [222/222 00:04]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "placenta\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='6180' max='6180' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [6180/6180 10:28, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Macro F1</th>\n",
+       "      <th>Weighted F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.128700</td>\n",
+       "      <td>0.125175</td>\n",
+       "      <td>0.960626</td>\n",
+       "      <td>0.935752</td>\n",
+       "      <td>0.959463</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.064000</td>\n",
+       "      <td>0.215607</td>\n",
+       "      <td>0.951456</td>\n",
+       "      <td>0.920579</td>\n",
+       "      <td>0.949828</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.051300</td>\n",
+       "      <td>0.203044</td>\n",
+       "      <td>0.961165</td>\n",
+       "      <td>0.934195</td>\n",
+       "      <td>0.959470</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.045300</td>\n",
+       "      <td>0.115701</td>\n",
+       "      <td>0.978964</td>\n",
+       "      <td>0.966387</td>\n",
+       "      <td>0.978788</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.048200</td>\n",
+       "      <td>0.149484</td>\n",
+       "      <td>0.973571</td>\n",
+       "      <td>0.958927</td>\n",
+       "      <td>0.973305</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.040900</td>\n",
+       "      <td>0.134339</td>\n",
+       "      <td>0.978964</td>\n",
+       "      <td>0.967466</td>\n",
+       "      <td>0.978899</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.001600</td>\n",
+       "      <td>0.159900</td>\n",
+       "      <td>0.978425</td>\n",
+       "      <td>0.966713</td>\n",
+       "      <td>0.978211</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.002400</td>\n",
+       "      <td>0.125351</td>\n",
+       "      <td>0.979504</td>\n",
+       "      <td>0.968064</td>\n",
+       "      <td>0.979428</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.009400</td>\n",
+       "      <td>0.120132</td>\n",
+       "      <td>0.980583</td>\n",
+       "      <td>0.969631</td>\n",
+       "      <td>0.980506</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.001500</td>\n",
+       "      <td>0.137864</td>\n",
+       "      <td>0.978964</td>\n",
+       "      <td>0.967180</td>\n",
+       "      <td>0.978825</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='155' max='155' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [155/155 00:05]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "immune\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='17140' max='17140' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [17140/17140 22:02, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Macro F1</th>\n",
+       "      <th>Weighted F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.288900</td>\n",
+       "      <td>0.231582</td>\n",
+       "      <td>0.936770</td>\n",
+       "      <td>0.868405</td>\n",
+       "      <td>0.934816</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.203200</td>\n",
+       "      <td>0.206292</td>\n",
+       "      <td>0.937354</td>\n",
+       "      <td>0.888661</td>\n",
+       "      <td>0.939555</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.183500</td>\n",
+       "      <td>0.195811</td>\n",
+       "      <td>0.944942</td>\n",
+       "      <td>0.891149</td>\n",
+       "      <td>0.944008</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.151000</td>\n",
+       "      <td>0.219581</td>\n",
+       "      <td>0.947665</td>\n",
+       "      <td>0.906578</td>\n",
+       "      <td>0.947093</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.090000</td>\n",
+       "      <td>0.247120</td>\n",
+       "      <td>0.946693</td>\n",
+       "      <td>0.898812</td>\n",
+       "      <td>0.945808</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.060400</td>\n",
+       "      <td>0.249662</td>\n",
+       "      <td>0.948444</td>\n",
+       "      <td>0.905014</td>\n",
+       "      <td>0.947975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.071300</td>\n",
+       "      <td>0.272767</td>\n",
+       "      <td>0.949416</td>\n",
+       "      <td>0.911514</td>\n",
+       "      <td>0.949748</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.052600</td>\n",
+       "      <td>0.305051</td>\n",
+       "      <td>0.945331</td>\n",
+       "      <td>0.902348</td>\n",
+       "      <td>0.944987</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.026900</td>\n",
+       "      <td>0.294135</td>\n",
+       "      <td>0.948638</td>\n",
+       "      <td>0.904058</td>\n",
+       "      <td>0.948296</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.034500</td>\n",
+       "      <td>0.292029</td>\n",
+       "      <td>0.950195</td>\n",
+       "      <td>0.908547</td>\n",
+       "      <td>0.949753</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='429' max='429' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [429/429 00:13]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "large_intestine\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='33070' max='33070' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [33070/33070 43:02, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Macro F1</th>\n",
+       "      <th>Weighted F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.306200</td>\n",
+       "      <td>0.312431</td>\n",
+       "      <td>0.908266</td>\n",
+       "      <td>0.786242</td>\n",
+       "      <td>0.900768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.223900</td>\n",
+       "      <td>0.248096</td>\n",
+       "      <td>0.925101</td>\n",
+       "      <td>0.841251</td>\n",
+       "      <td>0.920987</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.173600</td>\n",
+       "      <td>0.259997</td>\n",
+       "      <td>0.925907</td>\n",
+       "      <td>0.850348</td>\n",
+       "      <td>0.926290</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.162900</td>\n",
+       "      <td>0.282306</td>\n",
+       "      <td>0.925000</td>\n",
+       "      <td>0.873669</td>\n",
+       "      <td>0.925531</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.143400</td>\n",
+       "      <td>0.254494</td>\n",
+       "      <td>0.937903</td>\n",
+       "      <td>0.876749</td>\n",
+       "      <td>0.937836</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.104500</td>\n",
+       "      <td>0.289942</td>\n",
+       "      <td>0.934677</td>\n",
+       "      <td>0.875333</td>\n",
+       "      <td>0.934339</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.080300</td>\n",
+       "      <td>0.313914</td>\n",
+       "      <td>0.935484</td>\n",
+       "      <td>0.877271</td>\n",
+       "      <td>0.934986</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.063500</td>\n",
+       "      <td>0.339868</td>\n",
+       "      <td>0.936290</td>\n",
+       "      <td>0.882267</td>\n",
+       "      <td>0.936187</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.042500</td>\n",
+       "      <td>0.345784</td>\n",
+       "      <td>0.938911</td>\n",
+       "      <td>0.882963</td>\n",
+       "      <td>0.938682</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.038900</td>\n",
+       "      <td>0.352199</td>\n",
+       "      <td>0.939516</td>\n",
+       "      <td>0.885509</td>\n",
+       "      <td>0.939497</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='827' max='827' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [827/827 00:26]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "pancreas\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='18280' max='18280' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [18280/18280 23:32, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Macro F1</th>\n",
+       "      <th>Weighted F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.340100</td>\n",
+       "      <td>0.343200</td>\n",
+       "      <td>0.896244</td>\n",
+       "      <td>0.655661</td>\n",
+       "      <td>0.879469</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.178300</td>\n",
+       "      <td>0.224033</td>\n",
+       "      <td>0.930890</td>\n",
+       "      <td>0.859772</td>\n",
+       "      <td>0.925342</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.154200</td>\n",
+       "      <td>0.208034</td>\n",
+       "      <td>0.941284</td>\n",
+       "      <td>0.887012</td>\n",
+       "      <td>0.939485</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.121200</td>\n",
+       "      <td>0.216660</td>\n",
+       "      <td>0.940372</td>\n",
+       "      <td>0.880716</td>\n",
+       "      <td>0.939431</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.099900</td>\n",
+       "      <td>0.254255</td>\n",
+       "      <td>0.940554</td>\n",
+       "      <td>0.889088</td>\n",
+       "      <td>0.938300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.065800</td>\n",
+       "      <td>0.267429</td>\n",
+       "      <td>0.942743</td>\n",
+       "      <td>0.897682</td>\n",
+       "      <td>0.942815</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.061200</td>\n",
+       "      <td>0.282509</td>\n",
+       "      <td>0.945478</td>\n",
+       "      <td>0.898797</td>\n",
+       "      <td>0.943881</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.036800</td>\n",
+       "      <td>0.301781</td>\n",
+       "      <td>0.943837</td>\n",
+       "      <td>0.903816</td>\n",
+       "      <td>0.944163</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.035400</td>\n",
+       "      <td>0.317026</td>\n",
+       "      <td>0.942560</td>\n",
+       "      <td>0.902241</td>\n",
+       "      <td>0.942071</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.014200</td>\n",
+       "      <td>0.313259</td>\n",
+       "      <td>0.946754</td>\n",
+       "      <td>0.904955</td>\n",
+       "      <td>0.946129</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='457' max='457' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [457/457 00:11]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /n/home01/ctheodoris/models/210602_111318_geneformer_27M_L6_emb256_SL2048_E3_B12_LR0.001_LSlinear_WU10000_Oadamw_DS12/models/ and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "liver\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='18690' max='18690' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [18690/18690 26:56, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Macro F1</th>\n",
+       "      <th>Weighted F1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.388500</td>\n",
+       "      <td>0.385503</td>\n",
+       "      <td>0.878188</td>\n",
+       "      <td>0.673887</td>\n",
+       "      <td>0.871348</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.315900</td>\n",
+       "      <td>0.302775</td>\n",
+       "      <td>0.907437</td>\n",
+       "      <td>0.754182</td>\n",
+       "      <td>0.903474</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.242600</td>\n",
+       "      <td>0.321844</td>\n",
+       "      <td>0.907972</td>\n",
+       "      <td>0.779504</td>\n",
+       "      <td>0.905881</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.238600</td>\n",
+       "      <td>0.323119</td>\n",
+       "      <td>0.911539</td>\n",
+       "      <td>0.790922</td>\n",
+       "      <td>0.910299</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.160100</td>\n",
+       "      <td>0.328203</td>\n",
+       "      <td>0.915641</td>\n",
+       "      <td>0.793490</td>\n",
+       "      <td>0.913836</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.163100</td>\n",
+       "      <td>0.348942</td>\n",
+       "      <td>0.917425</td>\n",
+       "      <td>0.813604</td>\n",
+       "      <td>0.916911</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.124100</td>\n",
+       "      <td>0.373799</td>\n",
+       "      <td>0.916890</td>\n",
+       "      <td>0.820355</td>\n",
+       "      <td>0.916688</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.118700</td>\n",
+       "      <td>0.399474</td>\n",
+       "      <td>0.916890</td>\n",
+       "      <td>0.818839</td>\n",
+       "      <td>0.916640</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.066800</td>\n",
+       "      <td>0.414363</td>\n",
+       "      <td>0.917603</td>\n",
+       "      <td>0.830703</td>\n",
+       "      <td>0.917226</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.075800</td>\n",
+       "      <td>0.413828</td>\n",
+       "      <td>0.919030</td>\n",
+       "      <td>0.828149</td>\n",
+       "      <td>0.918506</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n",
+      "<ipython-input-16-7f7bd5a45820>:54: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+      "  batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='936' max='468' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [468/468 00:39]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "for organ in organ_list:\n",
+    "    print(organ)\n",
+    "    organ_trainset = trainset_dict[organ]\n",
+    "    organ_evalset = evalset_dict[organ]\n",
+    "    organ_label_dict = traintargetdict_dict[organ]\n",
+    "    \n",
+    "    # set logging steps\n",
+    "    logging_steps = round(len(organ_trainset)/geneformer_batch_size/10)\n",
+    "    \n",
+    "    # reload pretrained model\n",
+    "    model = BertForSequenceClassification.from_pretrained(\"/path/to/pretrained_model/\", \n",
+    "                                                      num_labels=len(organ_label_dict.keys()),\n",
+    "                                                      output_attentions = False,\n",
+    "                                                      output_hidden_states = False).to(\"cuda\")\n",
+    "    \n",
+    "    # create output directory\n",
+    "    current_date = datetime.datetime.now()\n",
+    "    datestamp = f\"{str(current_date.year)[-2:]}{current_date.month:02d}{current_date.day:02d}\"\n",
+    "    output_dir = f\"/path/to/models/{datestamp}_geneformer_CellClassifier_{organ}_L{max_input_size}_B{geneformer_batch_size}_LR{max_lr}_LS{lr_schedule_fn}_WU{warmup_steps}_E{epochs}_O{optimizer}_F{freeze_layers}/\"\n",
+    "    \n",
+    "    # ensure not overwriting previously saved model\n",
+    "    saved_model_test = os.path.join(output_dir, f\"pytorch_model.bin\")\n",
+    "    if os.path.isfile(saved_model_test) == True:\n",
+    "        raise Exception(\"Model already saved to this directory.\")\n",
+    "\n",
+    "    # make output directories\n",
+    "    subprocess.call(f'mkdir {output_dir}', shell=True)\n",
+    "    \n",
+    "    # set training arguments\n",
+    "    training_args = {\n",
+    "        \"learning_rate\": max_lr,\n",
+    "        \"do_train\": True,\n",
+    "        \"do_eval\": True,\n",
+    "        \"evaluation_strategy\": \"epoch\",\n",
+    "        \"logging_steps\": logging_steps,\n",
+    "        \"group_by_length\": True,\n",
+    "        \"length_column_name\": \"length\",\n",
+    "        \"disable_tqdm\": False,\n",
+    "        \"lr_scheduler_type\": lr_schedule_fn,\n",
+    "        \"warmup_steps\": warmup_steps,\n",
+    "        \"weight_decay\": 0.001,\n",
+    "        \"per_device_train_batch_size\": geneformer_batch_size,\n",
+    "        \"per_device_eval_batch_size\": geneformer_batch_size,\n",
+    "        \"num_train_epochs\": epochs,\n",
+    "        \"load_best_model_at_end\": True,\n",
+    "        \"output_dir\": output_dir,\n",
+    "    }\n",
+    "    \n",
+    "    training_args_init = TrainingArguments(**training_args)\n",
+    "\n",
+    "    # create the trainer\n",
+    "    trainer = Trainer(\n",
+    "        model=model,\n",
+    "        args=training_args_init,\n",
+    "        data_collator=DataCollatorForCellClassification(),\n",
+    "        train_dataset=organ_trainset,\n",
+    "        eval_dataset=organ_evalset,\n",
+    "        compute_metrics=compute_metrics\n",
+    "    )\n",
+    "    # train the cell type classifier\n",
+    "    trainer.train()\n",
+    "    predictions = trainer.predict(organ_evalset)\n",
+    "    with open(f\"{output_dir}predictions.pickle\", \"wb\") as fp:\n",
+    "        pickle.dump(predictions, fp)\n",
+    "    trainer.save_metrics(\"eval\",predictions.metrics)\n",
+    "    trainer.save_model(output_dir)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.8.6 64-bit ('3.8.6')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.6"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "eba1599a1f7e611c14c87ccff6793920aa63510b01fc0e229d6dd014149b8829"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

examples/pretrain_geneformer_w_deepspeed.py CHANGED Viewed

@@ -23,7 +23,7 @@ import torch
 from datasets import load_from_disk
 from transformers import BertConfig, BertForMaskedLM, TrainingArguments
-from .trainer import GeneformerTrainer
 seed_num = 0
 random.seed(seed_num)
@@ -149,7 +149,7 @@ training_args = TrainingArguments(**training_args)
 print("Starting training.")
 # define the trainer
-trainer = GeneformerTrainer(
     model=model,
     args=training_args,
     # pretraining corpus (e.g. https://huggingface.co/datasets/ctheodoris/Genecorpus-30M/tree/main/genecorpus_30M_2048.dataset)

 from datasets import load_from_disk
 from transformers import BertConfig, BertForMaskedLM, TrainingArguments
+from geneformer import GeneformerPretrainer
 seed_num = 0
 random.seed(seed_num)
 print("Starting training.")
 # define the trainer
+trainer = GeneformerPretrainer(
     model=model,
     args=training_args,
     # pretraining corpus (e.g. https://huggingface.co/datasets/ctheodoris/Genecorpus-30M/tree/main/genecorpus_30M_2048.dataset)

geneformer/__init__.py CHANGED Viewed

	@@ -0,0 +1,8 @@

+from . import tokenizer
+from . import pretrainer
+from . import collator_for_cell_classification
+from . import collator_for_gene_classification
+from .tokenizer import TranscriptomeTokenizer
+from .pretrainer import GeneformerPretrainer
+from .collator_for_gene_classification import DataCollatorForGeneClassification
+from .collator_for_cell_classification import DataCollatorForCellClassification

geneformer/collator_for_cell_classification.py ADDED Viewed

	@@ -0,0 +1,581 @@

+"""
+Geneformer collator for cell classification.
+Huggingface data collator modified to accommodate single-cell transcriptomics data for cell classification.
+"""
+import numpy as np
+import torch
+import warnings
+from enum import Enum
+from typing import Dict, List, Optional, Union
+from transformers import (
+    DataCollatorForTokenClassification,
+    SpecialTokensMixin,
+    BatchEncoding,
+)
+from transformers.utils import is_tf_available, is_torch_available, logging, to_py_obj
+from transformers.utils.generic import _is_tensorflow, _is_torch
+from .pretrainer import token_dictionary
+EncodedInput = List[int]
+logger = logging.get_logger(__name__)
+VERY_LARGE_INTEGER = int(
+    1e30
+)  # This is used to set the max input length for a model with infinite size input
+LARGE_INTEGER = int(
+    1e20
+)  # This is used when we need something big but slightly smaller than VERY_LARGE_INTEGER
+# precollator functions
+def run_once(f):
+    def wrapper(*args, **kwargs):
+        if not wrapper.has_run:
+            wrapper.has_run = True
+            return f(*args, **kwargs)
+    wrapper.has_run = False
+    return wrapper
+@run_once
+def check_output_once(output):
+    return print(output)
+class ExplicitEnum(Enum):
+    """
+    Enum with more explicit error message for missing values.
+    """
+    @classmethod
+    def _missing_(cls, value):
+        raise ValueError(
+            "%r is not a valid %s, please select one of %s"
+            % (value, cls.__name__, str(list(cls._value2member_map_.keys())))
+        )
+class TruncationStrategy(ExplicitEnum):
+    """
+    Possible values for the ``truncation`` argument in :meth:`PreTrainedTokenizerBase.__call__`. Useful for
+    tab-completion in an IDE.
+    """
+    ONLY_FIRST = "only_first"
+    ONLY_SECOND = "only_second"
+    LONGEST_FIRST = "longest_first"
+    DO_NOT_TRUNCATE = "do_not_truncate"
+class PaddingStrategy(ExplicitEnum):
+    """
+    Possible values for the ``padding`` argument in :meth:`PreTrainedTokenizerBase.__call__`. Useful for tab-completion
+    in an IDE.
+    """
+    LONGEST = "longest"
+    MAX_LENGTH = "max_length"
+    DO_NOT_PAD = "do_not_pad"
+class TensorType(ExplicitEnum):
+    """
+    Possible values for the ``return_tensors`` argument in :meth:`PreTrainedTokenizerBase.__call__`. Useful for
+    tab-completion in an IDE.
+    """
+    PYTORCH = "pt"
+    TENSORFLOW = "tf"
+    NUMPY = "np"
+    JAX = "jax"
+class PrecollatorForCellClassification(SpecialTokensMixin):
+    mask_token = "<mask>"
+    mask_token_id = token_dictionary.get("<mask>")
+    pad_token = "<pad>"
+    pad_token_id = token_dictionary.get("<pad>")
+    padding_side = "right"
+    all_special_ids = [
+        token_dictionary.get("<mask>"),
+        token_dictionary.get("<pad>")
+    ]
+    model_input_names = ["input_ids"]
+    def _get_padding_truncation_strategies(
+        self, padding=True, truncation=False, max_length=None, pad_to_multiple_of=None, verbose=True, **kwargs
+    ):
+        """
+        Find the correct padding/truncation strategy with backward compatibility for old arguments (truncation_strategy
+        and pad_to_max_length) and behaviors.
+        """
+        old_truncation_strategy = kwargs.pop("truncation_strategy", "do_not_truncate")
+        old_pad_to_max_length = kwargs.pop("pad_to_max_length", False)
+        # Backward compatibility for previous behavior, maybe we should deprecate it:
+        # If you only set max_length, it activates truncation for max_length
+        if max_length is not None and padding is False and truncation is False:
+            if verbose:
+                if not self.deprecation_warnings.get("Truncation-not-explicitly-activated", False):
+                    logger.warning(
+                        "Truncation was not explicitly activated but `max_length` is provided a specific value, "
+                        "please use `truncation=True` to explicitly truncate examples to max length. "
+                        "Defaulting to 'longest_first' truncation strategy. "
+                        "If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy "
+                        "more precisely by providing a specific strategy to `truncation`."
+                    )
+                self.deprecation_warnings["Truncation-not-explicitly-activated"] = True
+            truncation = "longest_first"
+        # Get padding strategy
+        if padding is False and old_pad_to_max_length:
+            if verbose:
+                warnings.warn(
+                    "The `pad_to_max_length` argument is deprecated and will be removed in a future version, "
+                    "use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or "
+                    "use `padding='max_length'` to pad to a max length. In this case, you can give a specific "
+                    "length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the "
+                    "maximal input size of the model (e.g. 512 for Bert).",
+                    FutureWarning,
+                )
+            if max_length is None:
+                padding_strategy = PaddingStrategy.LONGEST
+            else:
+                padding_strategy = PaddingStrategy.MAX_LENGTH
+        elif padding is not False:
+            if padding is True:
+                padding_strategy = PaddingStrategy.LONGEST  # Default to pad to the longest sequence in the batch
+            elif not isinstance(padding, PaddingStrategy):
+                padding_strategy = PaddingStrategy(padding)
+            elif isinstance(padding, PaddingStrategy):
+                padding_strategy = padding
+        else:
+            padding_strategy = PaddingStrategy.DO_NOT_PAD
+        # Get truncation strategy
+        if truncation is False and old_truncation_strategy != "do_not_truncate":
+            if verbose:
+                warnings.warn(
+                    "The `truncation_strategy` argument is deprecated and will be removed in a future version, "
+                    "use `truncation=True` to truncate examples to a max length. You can give a specific "
+                    "length with `max_length` (e.g. `max_length=45`) or leave max_length to None to truncate to the "
+                    "maximal input size of the model (e.g. 512 for Bert). "
+                    " If you have pairs of inputs, you can give a specific truncation strategy selected among "
+                    "`truncation='only_first'` (will only truncate the first sentence in the pairs) "
+                    "`truncation='only_second'` (will only truncate the second sentence in the pairs) "
+                    "or `truncation='longest_first'` (will iteratively remove tokens from the longest sentence in the pairs).",
+                    FutureWarning,
+                )
+            truncation_strategy = TruncationStrategy(old_truncation_strategy)
+        elif truncation is not False:
+            if truncation is True:
+                truncation_strategy = (
+                    TruncationStrategy.LONGEST_FIRST
+                )  # Default to truncate the longest sequences in pairs of inputs
+            elif not isinstance(truncation, TruncationStrategy):
+                truncation_strategy = TruncationStrategy(truncation)
+            elif isinstance(truncation, TruncationStrategy):
+                truncation_strategy = truncation
+        else:
+            truncation_strategy = TruncationStrategy.DO_NOT_TRUNCATE
+        # Set max length if needed
+        if max_length is None:
+            if padding_strategy == PaddingStrategy.MAX_LENGTH:
+                if self.model_max_length > LARGE_INTEGER:
+                    if verbose:
+                        if not self.deprecation_warnings.get("Asking-to-pad-to-max_length", False):
+                            logger.warning(
+                                "Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. "
+                                "Default to no padding."
+                            )
+                        self.deprecation_warnings["Asking-to-pad-to-max_length"] = True
+                    padding_strategy = PaddingStrategy.DO_NOT_PAD
+                else:
+                    max_length = self.model_max_length
+            if truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE:
+                if self.model_max_length > LARGE_INTEGER:
+                    if verbose:
+                        if not self.deprecation_warnings.get("Asking-to-truncate-to-max_length", False):
+                            logger.warning(
+                                "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. "
+                                "Default to no truncation."
+                            )
+                        self.deprecation_warnings["Asking-to-truncate-to-max_length"] = True
+                    truncation_strategy = TruncationStrategy.DO_NOT_TRUNCATE
+                else:
+                    max_length = self.model_max_length
+        # Test if we have a padding token
+        if padding_strategy != PaddingStrategy.DO_NOT_PAD and (not self.pad_token or self.pad_token_id < 0):
+            raise ValueError(
+                "Asking to pad but the tokenizer does not have a padding token. "
+                "Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` "
+                "or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`."
+            )
+        # Check that we will truncate to a multiple of pad_to_multiple_of if both are provided
+        if (
+            truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE
+            and padding_strategy != PaddingStrategy.DO_NOT_PAD
+            and pad_to_multiple_of is not None
+            and max_length is not None
+            and (max_length % pad_to_multiple_of != 0)
+        ):
+            raise ValueError(
+                f"Truncation and padding are both activated but "
+                f"truncation length ({max_length}) is not a multiple of pad_to_multiple_of ({pad_to_multiple_of})."
+            )
+        return padding_strategy, truncation_strategy, max_length, kwargs
+    def pad(
+        self,
+        encoded_inputs: Union[
+            BatchEncoding,
+            List[BatchEncoding],
+            Dict[str, EncodedInput],
+            Dict[str, List[EncodedInput]],
+            List[Dict[str, EncodedInput]],
+        ],
+        padding: Union[bool, str, PaddingStrategy] = True,
+        max_length: Optional[int] = None,
+        pad_to_multiple_of: Optional[int] = None,
+        return_attention_mask: Optional[bool] = True,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        verbose: bool = True,
+    ) -> BatchEncoding:
+        """
+        Pad a single encoded input or a batch of encoded inputs up to predefined length or to the max sequence length
+        in the batch.
+        Padding side (left/right) padding token ids are defined at the tokenizer level (with ``self.padding_side``,
+        ``self.pad_token_id`` and ``self.pad_token_type_id``)
+        .. note::
+            If the ``encoded_inputs`` passed are dictionary of numpy arrays, PyTorch tensors or TensorFlow tensors, the
+            result will use the same type unless you provide a different tensor type with ``return_tensors``. In the
+            case of PyTorch tensors, you will lose the specific device of your tensors however.
+        Args:
+            encoded_inputs (:class:`~transformers.BatchEncoding`, list of :class:`~transformers.BatchEncoding`, :obj:`Dict[str, List[int]]`, :obj:`Dict[str, List[List[int]]` or :obj:`List[Dict[str, List[int]]]`):
+                Tokenized inputs. Can represent one input (:class:`~transformers.BatchEncoding` or :obj:`Dict[str,
+                List[int]]`) or a batch of tokenized inputs (list of :class:`~transformers.BatchEncoding`, `Dict[str,
+                List[List[int]]]` or `List[Dict[str, List[int]]]`) so you can use this method during preprocessing as
+                well as in a PyTorch Dataloader collate function.
+                Instead of :obj:`List[int]` you can have tensors (numpy arrays, PyTorch tensors or TensorFlow tensors),
+                see the note above for the return type.
+            padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+                 Select a strategy to pad the returned sequences (according to the model's padding side and padding
+                 index) among:
+                * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a
+                  single sequence if provided).
+                * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+                  maximum acceptable input length for the model if that argument is not provided.
+                * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+                  different lengths).
+            max_length (:obj:`int`, `optional`):
+                Maximum length of the returned list and optionally padding length (see above).
+            pad_to_multiple_of (:obj:`int`, `optional`):
+                If set will pad the sequence to a multiple of the provided value.
+                This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability
+                >= 7.5 (Volta).
+            return_attention_mask (:obj:`bool`, `optional`):
+                Whether to return the attention mask. If left to the default, will return the attention mask according
+                to the specific tokenizer's default, defined by the :obj:`return_outputs` attribute.
+                `What are attention masks? <../glossary.html#attention-mask>`__
+            return_tensors (:obj:`str` or :class:`~transformers.tokenization_utils_base.TensorType`, `optional`):
+                If set, will return tensors instead of list of python integers. Acceptable values are:
+                * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects.
+                * :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects.
+                * :obj:`'np'`: Return Numpy :obj:`np.ndarray` objects.
+            verbose (:obj:`bool`, `optional`, defaults to :obj:`True`):
+                Whether or not to print more information and warnings.
+        """
+        # If we have a list of dicts, let's convert it in a dict of lists
+        # We do this to allow using this method as a collate_fn function in PyTorch Dataloader
+        if isinstance(encoded_inputs, (list, tuple)) and isinstance(encoded_inputs[0], (dict, BatchEncoding)):
+            encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0].keys()}
+        # The model's main input name, usually `input_ids`, has be passed for padding
+        if self.model_input_names[0] not in encoded_inputs:
+            raise ValueError(
+                "You should supply an encoding or a list of encodings to this method"
+                f"that includes {self.model_input_names[0]}, but you provided {list(encoded_inputs.keys())}"
+            )
+        required_input = encoded_inputs[self.model_input_names[0]]
+        if not required_input:
+            if return_attention_mask:
+                encoded_inputs["attention_mask"] = []
+            return encoded_inputs
+        # If we have PyTorch/TF/NumPy tensors/arrays as inputs, we cast them as python objects
+        # and rebuild them afterwards if no return_tensors is specified
+        # Note that we lose the specific device the tensor may be on for PyTorch
+        first_element = required_input[0]
+        if isinstance(first_element, (list, tuple)):
+            # first_element might be an empty list/tuple in some edge cases so we grab the first non empty element.
+            index = 0
+            while len(required_input[index]) == 0:
+                index += 1
+            if index < len(required_input):
+                first_element = required_input[index][0]
+        # At this state, if `first_element` is still a list/tuple, it's an empty one so there is nothing to do.
+        if not isinstance(first_element, (int, list, tuple)):
+            if is_tf_available() and _is_tensorflow(first_element):
+                return_tensors = "tf" if return_tensors is None else return_tensors
+            elif is_torch_available() and _is_torch(first_element):
+                return_tensors = "pt" if return_tensors is None else return_tensors
+            elif isinstance(first_element, np.ndarray):
+                return_tensors = "np" if return_tensors is None else return_tensors
+            else:
+                raise ValueError(
+                    f"type of {first_element} unknown: {type(first_element)}. "
+                    f"Should be one of a python, numpy, pytorch or tensorflow object."
+                )
+            for key, value in encoded_inputs.items():
+                encoded_inputs[key] = to_py_obj(value)
+        # Convert padding_strategy in PaddingStrategy
+        padding_strategy, _, max_length, _ = self._get_padding_truncation_strategies(
+            padding=padding, max_length=max_length, verbose=verbose
+        )
+        required_input = encoded_inputs[self.model_input_names[0]]
+        if required_input and not isinstance(required_input[0], (list, tuple)):
+            encoded_inputs = self._pad(
+                encoded_inputs,
+                max_length=max_length,
+                padding_strategy=padding_strategy,
+                pad_to_multiple_of=pad_to_multiple_of,
+                return_attention_mask=return_attention_mask,
+            )
+            return BatchEncoding(encoded_inputs, tensor_type=return_tensors)
+        batch_size = len(required_input)
+        assert all(
+            len(v) == batch_size for v in encoded_inputs.values()
+        ), "Some items in the output dictionary have a different batch size than others."
+        if padding_strategy == PaddingStrategy.LONGEST:
+            max_length = max(len(inputs) for inputs in required_input)
+            padding_strategy = PaddingStrategy.MAX_LENGTH
+        batch_outputs = {}
+        for i in range(batch_size):
+            inputs = dict((k, v[i]) for k, v in encoded_inputs.items())
+            outputs = self._pad(
+                inputs,
+                max_length=max_length,
+                padding_strategy=padding_strategy,
+                pad_to_multiple_of=pad_to_multiple_of,
+                return_attention_mask=return_attention_mask,
+            )
+            for key, value in outputs.items():
+                if key not in batch_outputs:
+                    batch_outputs[key] = []
+                batch_outputs[key].append(value)
+        del batch_outputs["label"]
+        return BatchEncoding(batch_outputs, tensor_type=return_tensors)
+    def _pad(
+        self,
+        encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
+        max_length: Optional[int] = None,
+        padding_strategy: PaddingStrategy = PaddingStrategy.LONGEST,
+        pad_to_multiple_of: Optional[int] = None,
+        return_attention_mask: Optional[bool] = True,
+    ) -> dict:
+        """
+        Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
+        Args:
+            encoded_inputs: Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
+            max_length: maximum length of the returned list and optionally padding length (see below).
+                Will truncate by taking into account the special tokens.
+            padding_strategy: PaddingStrategy to use for padding.
+                - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
+                - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
+                - PaddingStrategy.DO_NOT_PAD: Do not pad
+                The tokenizer padding sides are defined in self.padding_side:
+                    - 'left': pads on the left of the sequences
+                    - 'right': pads on the right of the sequences
+            pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
+                This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
+                >= 7.5 (Volta).
+            return_attention_mask: (optional) Set to False to avoid returning attention mask (default: set to model specifics)
+        """
+        # Load from model defaults
+        if return_attention_mask is None:
+            return_attention_mask = "attention_mask" in self.model_input_names
+        required_input = encoded_inputs[self.model_input_names[0]]
+        if padding_strategy == PaddingStrategy.LONGEST:
+            max_length = len(required_input)
+        if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
+            max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
+        needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
+        if needs_to_be_padded:
+            difference = max_length - len(required_input)
+            if self.padding_side == "right":
+                if return_attention_mask:
+                    encoded_inputs["attention_mask"] = [1] * len(required_input) + [0] * difference
+                if "token_type_ids" in encoded_inputs:
+                    encoded_inputs["token_type_ids"] = (
+                        encoded_inputs["token_type_ids"] + [self.pad_token_type_id] * difference
+                    )
+                if "special_tokens_mask" in encoded_inputs:
+                    encoded_inputs["special_tokens_mask"] = encoded_inputs["special_tokens_mask"] + [1] * difference
+                encoded_inputs[self.model_input_names[0]] = required_input + [self.pad_token_id] * difference
+            elif self.padding_side == "left":
+                if return_attention_mask:
+                    encoded_inputs["attention_mask"] = [0] * difference + [1] * len(required_input)
+                if "token_type_ids" in encoded_inputs:
+                    encoded_inputs["token_type_ids"] = [self.pad_token_type_id] * difference + encoded_inputs[
+                        "token_type_ids"
+                    ]
+                if "special_tokens_mask" in encoded_inputs:
+                    encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
+                encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
+            else:
+                raise ValueError("Invalid padding strategy:" + str(self.padding_side))
+        elif return_attention_mask and "attention_mask" not in encoded_inputs:
+            encoded_inputs["attention_mask"] = [1] * len(required_input)
+#         check_output_once(encoded_inputs)
+        return encoded_inputs
+    def get_special_tokens_mask(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
+    ) -> List[int]:
+        """
+        Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
+        special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods.
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of ids of the first sequence.
+            token_ids_1 (:obj:`List[int]`, `optional`):
+                List of ids of the second sequence.
+            already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Whether or not the token list is already formatted with special tokens for the model.
+        Returns:
+            A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+        assert already_has_special_tokens and token_ids_1 is None, (
+            "You cannot use ``already_has_special_tokens=False`` with this tokenizer. "
+            "Please use a slow (full python) tokenizer to activate this argument."
+            "Or set `return_special_tokens_mask=True` when calling the encoding method "
+            "to get the special tokens mask in any tokenizer. "
+        )
+        all_special_ids = self.all_special_ids  # cache the property
+        special_tokens_mask = [1 if token in all_special_ids else 0 for token in token_ids_0]
+        return special_tokens_mask
+    def convert_tokens_to_ids(self, tokens: Union[str, List[str]]) -> Union[int, List[int]]:
+        """
+        Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the
+        vocabulary.
+        Args:
+            tokens (:obj:`str` or :obj:`List[str]`): One or several token(s) to convert to token id(s).
+        Returns:
+            :obj:`int` or :obj:`List[int]`: The token id or list of token ids.
+        """
+        if tokens is None:
+            return None
+        if isinstance(tokens, str):
+            return self._convert_token_to_id_with_added_voc(tokens)
+        ids = []
+        for token in tokens:
+            ids.append(self._convert_token_to_id_with_added_voc(token))
+        return ids
+    def _convert_token_to_id_with_added_voc(self, token):
+        if token is None:
+            return None
+        return token_dictionary.get(token)
+    def __len__(self):
+        return len(token_dictionary)
+# collator functions
+class DataCollatorForCellClassification(DataCollatorForTokenClassification):
+    """
+    Data collator that will dynamically pad the inputs received, as well as the labels.
+    Args:
+        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
+            The tokenizer used for encoding the data.
+        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        max_length (:obj:`int`, `optional`):
+            Maximum length of the returned list and optionally padding length (see above).
+        pad_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
+            The id to use when padding the labels (-100 will be automatically ignore by PyTorch loss functions).
+    """
+    tokenizer: PrecollatorForCellClassification()
+    padding: Union[bool, str, PaddingStrategy] = True
+    max_length: Optional[int] = None
+    pad_to_multiple_of: Optional[int] = None
+    label_pad_token_id: int = -100
+    def __call__(self, features):
+        label_name = "label" if "label" in features[0].keys() else "labels"
+        labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None
+        batch = self.tokenizer.pad(
+            features,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            return_tensors="pt",
+        )
+        # Special handling for labels.
+        # Ensure that tensor is created with the correct type
+        # (it should be automatically the case, but let's make sure of it.)
+        first = features[0]
+        if "label" in first and first["label"] is not None:
+            label = first["label"].item() if isinstance(first["label"], torch.Tensor) else first["label"]
+            dtype = torch.long if isinstance(label, int) else torch.float
+            batch["labels"] = torch.tensor([f["label"] for f in features], dtype=dtype)
+        batch = {k: torch.tensor(v, dtype=torch.int64) for k, v in batch.items()}
+        return batch

geneformer/{trainer.py → pretrainer.py} RENAMED Viewed

@@ -1,7 +1,7 @@
 """
-Geneformer trainer and collator.
-Huggingface trainer and data collator modified to accommodate single-cell transcriptomics data.
 """
 import collections
 import math
@@ -589,7 +589,7 @@ class GeneformerPreCollator(SpecialTokensMixin):
         return len(self.token_dictionary)
-class GeneformerTrainer(Trainer):
     def __init__(self, *args, **kwargs):
         data_collator = kwargs.get("data_collator")
         token_dictionary = kwargs.get("token_dictionary")

 """
+Geneformer precollator and pretrainer.
+Huggingface data collator and trainer modified to accommodate single-cell transcriptomics data.
 """
 import collections
 import math
         return len(self.token_dictionary)
+class GeneformerPretrainer(Trainer):
     def __init__(self, *args, **kwargs):
         data_collator = kwargs.get("data_collator")
         token_dictionary = kwargs.get("token_dictionary")

geneformer/tokenizer.py CHANGED Viewed

@@ -2,8 +2,8 @@
 Geneformer tokenizer.
 Usage:
-  from geneformer.tokenizer import Tokenizer
-  tk = Tokenizer({"cell_type": "cell_type", "organ_major": "organ_major"}, nproc=4)
   tk.tokenize_data("loom_data_directory", "output_directory", "output_prefix")
 """
@@ -32,7 +32,7 @@ def tokenize_cell(gene_vector, gene_tokens):
     return sentence_tokens
-class Tokenizer:
     def __init__(
         self,
         custom_attr_name_dict,

 Geneformer tokenizer.
 Usage:
+  from geneformer import TranscriptomeTokenizer
+  tk = TranscriptomeTokenizer({"cell_type": "cell_type", "organ_major": "organ_major"}, nproc=4)
   tk.tokenize_data("loom_data_directory", "output_directory", "output_prefix")
 """
     return sentence_tokens
+class TranscriptomeTokenizer:
     def __init__(
         self,
         custom_attr_name_dict,