aehrc
/

cxrmate-rrg24

@@ -74,6 +74,9 @@
       2
     ],
     "sep_token_id": null,
     "suppress_tokens": null,
     "task_specific_params": null,
     "temperature": 1.0,

       2
     ],
     "sep_token_id": null,
+    "separator_token_ids": [
+      3
+    ],
     "suppress_tokens": null,
     "task_specific_params": null,
     "temperature": 1.0,

modelling_cxrrg.py CHANGED Viewed

@@ -200,7 +200,6 @@ class CXRRGModel(VisionEncoderDecoderModel):
     def prepare_inputs_for_generation(
         self,
         input_ids,
-        special_token_ids,
         past_key_values=None,
         use_cache=None,
         encoder_outputs=None,
@@ -226,7 +225,7 @@ class CXRRGModel(VisionEncoderDecoderModel):
             # `inputs_embeds` are only to be used in the 1st generation step:
             inputs_embeds = torch.cat([encoder_outputs[0], self.decoder.get_input_embeddings()(input_ids)], dim=1)
-            decoder_token_type_ids = self.token_ids_to_token_type_ids(input_ids, special_token_ids)
             decoder_token_type_ids = torch.cat(
                 [
                     torch.full(
@@ -255,7 +254,7 @@ class CXRRGModel(VisionEncoderDecoderModel):
             decoder_position_ids.masked_fill_(report_attention_mask == 0, 1)
             # Always place token_ids_to_token_type_ids_past before input_ids = input_ids[:, remove_prefix_length:]:
-            decoder_token_type_ids = self.token_ids_to_token_type_ids_past(input_ids, special_token_ids)
             decoder_position_ids = decoder_position_ids[:, -1:]
             past_length = past_key_values[0][0].shape[2]
@@ -282,13 +281,12 @@ class CXRRGModel(VisionEncoderDecoderModel):
         )
         return input_dict
-    def token_ids_to_token_type_ids(self, token_ids, special_token_ids):
         """
         Extract token type identifiers from the token identifiers.
         Argument/s:
             token_ids - token identifiers.
-            special_token_ids - special token identifiers that indicate the separation between sections.
             token_type_id_section - token type identifier for each section.
         Returns:
@@ -298,7 +296,7 @@ class CXRRGModel(VisionEncoderDecoderModel):
         mbatch_size, seq_len = token_ids.shape
         token_type_ids = torch.full_like(token_ids, self.config.section_ids[0], dtype=torch.long, device=token_ids.device)
-        for i, j in enumerate(special_token_ids):
             # Find first occurrence of special tokens that indicate the boundary between sections:
             cols = (token_ids == j).int().argmax(dim=1)
             rows = torch.arange(mbatch_size, device=token_ids.device)
@@ -323,14 +321,13 @@ class CXRRGModel(VisionEncoderDecoderModel):
         return token_type_ids
-    def token_ids_to_token_type_ids_past(self, token_ids, special_token_ids):
         """
         Extract token type identifiers from the token identifiers if past != None. Make sure to input all the
         token_ids (e.g., do not input input_ids = input_ids[:, remove_prefix_length:] from prepare_inputs_for_generation).
         Argument/s:
             token_ids - token identifiers.
-            special_token_ids - special token identifiers that indicate the separation between sections.
         Returns:
             token_type_ids - token type identifiers.
@@ -341,7 +338,7 @@ class CXRRGModel(VisionEncoderDecoderModel):
         # https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertTokenizer.create_token_type_ids_from_sequences.example
         token_ids = token_ids[:, :-1]
-        for i, j in enumerate(special_token_ids):
             # Find first occurrence of special token, which indicates the boundary between sections:
             exists = torch.any(token_ids == j, dim=1, keepdim=True)
@@ -445,13 +442,12 @@ class CXRRGModel(VisionEncoderDecoderModel):
         return batch_dict
-    def split_and_decode_sections(self, token_ids, special_token_ids, tokenizer: PreTrainedTokenizerFast):
         """
         Split the token identifiers into sections, then convert the token identifiers into strings.
         Argument/s:
             token_ids - token identifiers.
-            special_token_ids - special token identifiers that indicate the end of each section.
             tokenizer - Hugging Face tokenizer.
         Returns:
@@ -460,14 +456,14 @@ class CXRRGModel(VisionEncoderDecoderModel):
         _, seq_len = token_ids.shape
-        # The number of sections is the same as the number of special_token_ids:
-        num_sections = len(special_token_ids)
         sections = {k: [] for k in range(num_sections)}
         for i in token_ids:
             prev_col = 0
-            for j, k in enumerate(special_token_ids):
                 # The maximum sequence length was exceeded, thus no more tokens:
                 if prev_col >= seq_len:

     def prepare_inputs_for_generation(
         self,
         input_ids,
         past_key_values=None,
         use_cache=None,
         encoder_outputs=None,
             # `inputs_embeds` are only to be used in the 1st generation step:
             inputs_embeds = torch.cat([encoder_outputs[0], self.decoder.get_input_embeddings()(input_ids)], dim=1)
+            decoder_token_type_ids = self.token_ids_to_token_type_ids(input_ids)
             decoder_token_type_ids = torch.cat(
                 [
                     torch.full(
             decoder_position_ids.masked_fill_(report_attention_mask == 0, 1)
             # Always place token_ids_to_token_type_ids_past before input_ids = input_ids[:, remove_prefix_length:]:
+            decoder_token_type_ids = self.token_ids_to_token_type_ids_past(input_ids)
             decoder_position_ids = decoder_position_ids[:, -1:]
             past_length = past_key_values[0][0].shape[2]
         )
         return input_dict
+    def token_ids_to_token_type_ids(self, token_ids):
         """
         Extract token type identifiers from the token identifiers.
         Argument/s:
             token_ids - token identifiers.
             token_type_id_section - token type identifier for each section.
         Returns:
         mbatch_size, seq_len = token_ids.shape
         token_type_ids = torch.full_like(token_ids, self.config.section_ids[0], dtype=torch.long, device=token_ids.device)
+        for i, j in enumerate(self.config.decoder.separator_token_ids):
             # Find first occurrence of special tokens that indicate the boundary between sections:
             cols = (token_ids == j).int().argmax(dim=1)
             rows = torch.arange(mbatch_size, device=token_ids.device)
         return token_type_ids
+    def token_ids_to_token_type_ids_past(self, token_ids):
         """
         Extract token type identifiers from the token identifiers if past != None. Make sure to input all the
         token_ids (e.g., do not input input_ids = input_ids[:, remove_prefix_length:] from prepare_inputs_for_generation).
         Argument/s:
             token_ids - token identifiers.
         Returns:
             token_type_ids - token type identifiers.
         # https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertTokenizer.create_token_type_ids_from_sequences.example
         token_ids = token_ids[:, :-1]
+        for i, j in enumerate(self.config.decoder.separator_token_ids):
             # Find first occurrence of special token, which indicates the boundary between sections:
             exists = torch.any(token_ids == j, dim=1, keepdim=True)
         return batch_dict
+    def split_and_decode_sections(self, token_ids, tokenizer: PreTrainedTokenizerFast):
         """
         Split the token identifiers into sections, then convert the token identifiers into strings.
         Argument/s:
             token_ids - token identifiers.
             tokenizer - Hugging Face tokenizer.
         Returns:
         _, seq_len = token_ids.shape
+        # The number of sections is the same as the number of separator_token_ids:
+        num_sections = len(self.config.decoder.separator_token_ids)
         sections = {k: [] for k in range(num_sections)}
         for i in token_ids:
             prev_col = 0
+            for j, k in enumerate(self.config.decoder.separator_token_ids):
                 # The maximum sequence length was exceeded, thus no more tokens:
                 if prev_col >= seq_len: