Lolalb
/

AMPLIFY_350M

Feature Extraction

Model card Files Files and versions Community

Lolalb commited on Jan 23

Commit

99d97ed

·

verified ·

1 Parent(s): 2650f1f

Upload AMPLIFY

Files changed (1) hide show

amplify.py +10 -2

amplify.py CHANGED Viewed

@@ -295,6 +295,14 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
         # Initialize
         hidden_states, attentions = [], []
         # Expand and repeat: (Batch, Length) -> (Batch, Heads, Length, Length)
         if pad_mask is not None:
             pad_mask = pad_mask.unsqueeze(1).unsqueeze(1).repeat(1, self.config.num_attention_heads, pad_mask.size(-1), 1)
@@ -325,9 +333,9 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
             x = self.layer_norm_1(x)
         # Transformer encoder
-        for layer in self.transformer_encoder:
             x, attn = layer(x, pad_mask, freqs_cis, output_attentions, max_seqlen, cu_seqlens)
-            if output_hidden_states:
                 hidden_states.append(x)
             if output_attentions:
                 attentions.append(attn)

         # Initialize
         hidden_states, attentions = [], []
+        # We will output all the hidden_states that have an index higher than output_hidden_index
+        if type(output_hidden_states) == bool and not output_hidden_states:
+            output_hidden_index = self.config.num_hidden_layers + 1
+        elif type(output_hidden_states) == int:
+            output_hidden_index = output_hidden_states
+        else:
+            output_hidden_index = 0
         # Expand and repeat: (Batch, Length) -> (Batch, Heads, Length, Length)
         if pad_mask is not None:
             pad_mask = pad_mask.unsqueeze(1).unsqueeze(1).repeat(1, self.config.num_attention_heads, pad_mask.size(-1), 1)
             x = self.layer_norm_1(x)
         # Transformer encoder
+        for idx, layer in enumerate(self.transformer_encoder):
             x, attn = layer(x, pad_mask, freqs_cis, output_attentions, max_seqlen, cu_seqlens)
+            if idx >= output_hidden_index:
                 hidden_states.append(x)
             if output_attentions:
                 attentions.append(attn)