feat: finetuning via ST (#42)
Browse files- feat: support setting a default task (4ff8c1524c0a1f13e75bfb6f75c64d28035ce49a)
- Update modules.json (4a29e2c9633d6f8cb767ec461be609e856ec949d)
- Update custom_st.py (42a68bc2dcb4e4ad03121bbfdaefc3e2f3101bf9)
- custom_st.py +24 -6
- modules.json +3 -3
custom_st.py
CHANGED
|
@@ -51,8 +51,8 @@ class Transformer(nn.Module):
|
|
| 51 |
if config_args is None:
|
| 52 |
config_args = {}
|
| 53 |
|
|
|
|
| 54 |
self.config = AutoConfig.from_pretrained(model_name_or_path, **config_args, cache_dir=cache_dir)
|
| 55 |
-
self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=self.config, cache_dir=cache_dir, **model_args)
|
| 56 |
|
| 57 |
self._lora_adaptations = self.config.lora_adaptations
|
| 58 |
if (
|
|
@@ -66,6 +66,10 @@ class Transformer(nn.Module):
|
|
| 66 |
name: idx for idx, name in enumerate(self._lora_adaptations)
|
| 67 |
}
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
if max_seq_length is not None and "model_max_length" not in tokenizer_args:
|
| 70 |
tokenizer_args["model_max_length"] = max_seq_length
|
| 71 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
@@ -88,17 +92,31 @@ class Transformer(nn.Module):
|
|
| 88 |
if tokenizer_name_or_path is not None:
|
| 89 |
self.auto_model.config.tokenizer_class = self.tokenizer.__class__.__name__
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
)
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
if task and task not in self._lora_adaptations:
|
| 96 |
raise ValueError(
|
| 97 |
f"Unsupported task '{task}'. "
|
| 98 |
-
f"Supported tasks are: {', '.join(self.config.lora_adaptations)}."
|
| 99 |
f"Alternatively, don't pass the `task` argument to disable LoRA."
|
| 100 |
)
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
adapter_mask = None
|
| 103 |
if task:
|
| 104 |
task_id = self._adaptation_map[task]
|
|
|
|
| 51 |
if config_args is None:
|
| 52 |
config_args = {}
|
| 53 |
|
| 54 |
+
|
| 55 |
self.config = AutoConfig.from_pretrained(model_name_or_path, **config_args, cache_dir=cache_dir)
|
|
|
|
| 56 |
|
| 57 |
self._lora_adaptations = self.config.lora_adaptations
|
| 58 |
if (
|
|
|
|
| 66 |
name: idx for idx, name in enumerate(self._lora_adaptations)
|
| 67 |
}
|
| 68 |
|
| 69 |
+
self.default_task = model_args.pop('default_task', None)
|
| 70 |
+
|
| 71 |
+
self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=self.config, cache_dir=cache_dir, **model_args)
|
| 72 |
+
|
| 73 |
if max_seq_length is not None and "model_max_length" not in tokenizer_args:
|
| 74 |
tokenizer_args["model_max_length"] = max_seq_length
|
| 75 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
| 92 |
if tokenizer_name_or_path is not None:
|
| 93 |
self.auto_model.config.tokenizer_class = self.tokenizer.__class__.__name__
|
| 94 |
|
| 95 |
+
|
| 96 |
+
@property
|
| 97 |
+
def default_task(self):
|
| 98 |
+
return self._default_task
|
| 99 |
+
|
| 100 |
+
@default_task.setter
|
| 101 |
+
def default_task(self, task: Union[None, str]):
|
| 102 |
+
self._validate_task(task)
|
| 103 |
+
self._default_task = task
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _validate_task(self, task: str):
|
| 107 |
if task and task not in self._lora_adaptations:
|
| 108 |
raise ValueError(
|
| 109 |
f"Unsupported task '{task}'. "
|
| 110 |
+
f"Supported tasks are: {', '.join(self.config.lora_adaptations)}. "
|
| 111 |
f"Alternatively, don't pass the `task` argument to disable LoRA."
|
| 112 |
)
|
| 113 |
|
| 114 |
+
def forward(
|
| 115 |
+
self, features: Dict[str, torch.Tensor], task: Optional[str] = None
|
| 116 |
+
) -> Dict[str, torch.Tensor]:
|
| 117 |
+
"""Returns token_embeddings, cls_token"""
|
| 118 |
+
self._validate_task(task)
|
| 119 |
+
task = task or self.default_task
|
| 120 |
adapter_mask = None
|
| 121 |
if task:
|
| 122 |
task_id = self._adaptation_map[task]
|
modules.json
CHANGED
|
@@ -1,20 +1,20 @@
|
|
| 1 |
[
|
| 2 |
{
|
| 3 |
"idx": 0,
|
| 4 |
-
"name": "
|
| 5 |
"path": "",
|
| 6 |
"type": "custom_st.Transformer",
|
| 7 |
"kwargs": ["task"]
|
| 8 |
},
|
| 9 |
{
|
| 10 |
"idx": 1,
|
| 11 |
-
"name": "
|
| 12 |
"path": "1_Pooling",
|
| 13 |
"type": "sentence_transformers.models.Pooling"
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"idx": 2,
|
| 17 |
-
"name": "
|
| 18 |
"path": "2_Normalize",
|
| 19 |
"type": "sentence_transformers.models.Normalize"
|
| 20 |
}
|
|
|
|
| 1 |
[
|
| 2 |
{
|
| 3 |
"idx": 0,
|
| 4 |
+
"name": "transformer",
|
| 5 |
"path": "",
|
| 6 |
"type": "custom_st.Transformer",
|
| 7 |
"kwargs": ["task"]
|
| 8 |
},
|
| 9 |
{
|
| 10 |
"idx": 1,
|
| 11 |
+
"name": "pooler",
|
| 12 |
"path": "1_Pooling",
|
| 13 |
"type": "sentence_transformers.models.Pooling"
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"idx": 2,
|
| 17 |
+
"name": "normalizer",
|
| 18 |
"path": "2_Normalize",
|
| 19 |
"type": "sentence_transformers.models.Normalize"
|
| 20 |
}
|