feat: finetuning via ST (#42)

- feat: support setting a default task (4ff8c1524c0a1f13e75bfb6f75c64d28035ce49a)
- Update modules.json (4a29e2c9633d6f8cb767ec461be609e856ec949d)
- Update custom_st.py (42a68bc2dcb4e4ad03121bbfdaefc3e2f3101bf9)

Files changed (2) hide show

custom_st.py +24 -6
modules.json +3 -3

custom_st.py CHANGED Viewed

@@ -51,8 +51,8 @@ class Transformer(nn.Module):
         if config_args is None:
             config_args = {}
         self.config = AutoConfig.from_pretrained(model_name_or_path, **config_args, cache_dir=cache_dir)
-        self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=self.config, cache_dir=cache_dir, **model_args)
         self._lora_adaptations = self.config.lora_adaptations
         if (
@@ -66,6 +66,10 @@ class Transformer(nn.Module):
             name: idx for idx, name in enumerate(self._lora_adaptations)
         }
         if max_seq_length is not None and "model_max_length" not in tokenizer_args:
             tokenizer_args["model_max_length"] = max_seq_length
         self.tokenizer = AutoTokenizer.from_pretrained(
@@ -88,17 +92,31 @@ class Transformer(nn.Module):
         if tokenizer_name_or_path is not None:
             self.auto_model.config.tokenizer_class = self.tokenizer.__class__.__name__
-    def forward(
-        self, features: Dict[str, torch.Tensor], task: Optional[str] = None
-    ) -> Dict[str, torch.Tensor]:
-        """Returns token_embeddings, cls_token"""
         if task and task not in self._lora_adaptations:
             raise ValueError(
                 f"Unsupported task '{task}'. "
-                f"Supported tasks are: {', '.join(self.config.lora_adaptations)}."
                 f"Alternatively, don't pass the `task` argument to disable LoRA."
             )
         adapter_mask = None
         if task:
             task_id = self._adaptation_map[task]

         if config_args is None:
             config_args = {}
         self.config = AutoConfig.from_pretrained(model_name_or_path, **config_args, cache_dir=cache_dir)
         self._lora_adaptations = self.config.lora_adaptations
         if (
             name: idx for idx, name in enumerate(self._lora_adaptations)
         }
+        self.default_task = model_args.pop('default_task', None)
+        self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=self.config, cache_dir=cache_dir, **model_args)
         if max_seq_length is not None and "model_max_length" not in tokenizer_args:
             tokenizer_args["model_max_length"] = max_seq_length
         self.tokenizer = AutoTokenizer.from_pretrained(
         if tokenizer_name_or_path is not None:
             self.auto_model.config.tokenizer_class = self.tokenizer.__class__.__name__
+    @property
+    def default_task(self):
+        return self._default_task
+    @default_task.setter
+    def default_task(self, task: Union[None, str]):
+        self._validate_task(task)
+        self._default_task = task
+    def _validate_task(self, task: str):
         if task and task not in self._lora_adaptations:
             raise ValueError(
                 f"Unsupported task '{task}'. "
+                f"Supported tasks are: {', '.join(self.config.lora_adaptations)}. "
                 f"Alternatively, don't pass the `task` argument to disable LoRA."
             )
+    def forward(
+        self, features: Dict[str, torch.Tensor], task: Optional[str] = None
+    ) -> Dict[str, torch.Tensor]:
+        """Returns token_embeddings, cls_token"""
+        self._validate_task(task)
+        task = task or self.default_task
         adapter_mask = None
         if task:
             task_id = self._adaptation_map[task]

modules.json CHANGED Viewed

@@ -1,20 +1,20 @@
 [
   {
     "idx": 0,
-    "name": "0",
     "path": "",
     "type": "custom_st.Transformer",
     "kwargs": ["task"]
   },
   {
     "idx": 1,
-    "name": "1",
     "path": "1_Pooling",
     "type": "sentence_transformers.models.Pooling"
   },
   {
     "idx": 2,
-    "name": "2",
     "path": "2_Normalize",
     "type": "sentence_transformers.models.Normalize"
   }

 [
   {
     "idx": 0,
+    "name": "transformer",
     "path": "",
     "type": "custom_st.Transformer",
     "kwargs": ["task"]
   },
   {
     "idx": 1,
+    "name": "pooler",
     "path": "1_Pooling",
     "type": "sentence_transformers.models.Pooling"
   },
   {
     "idx": 2,
+    "name": "normalizer",
     "path": "2_Normalize",
     "type": "sentence_transformers.models.Normalize"
   }