refactor: print flash attn usage log only once (#4)

- refactor: print flash attn usage log only once (c013e88166f88ab0d45328cb2654914802b127e6)

Co-authored-by: Jeesoo Lee <[email protected]>

Files changed (1) hide show

modeling_motif.py CHANGED Viewed

@@ -472,8 +472,6 @@ class MotifFlashAttention2(MotifAttention):
         self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
-        logger.info(f'flash attention is used {not self._flash_attn_uses_top_left_mask}')
     def _reshape_heads(self, tensor, batch_size, seq_len):
         """2-way head split tensor reshape"""
         return tensor.reshape(batch_size, seq_len, self.num_heads, 2, self.head_dim)
@@ -981,6 +979,8 @@ class MotifModel(MotifPreTrainedModel):
         self.gradient_checkpointing = False
         self.post_init()
     def get_input_embeddings(self):
         return self.embed_tokens

         self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
     def _reshape_heads(self, tensor, batch_size, seq_len):
         """2-way head split tensor reshape"""
         return tensor.reshape(batch_size, seq_len, self.num_heads, 2, self.head_dim)
         self.gradient_checkpointing = False
         self.post_init()
+        logger.info(f'Using flash_attn: {is_flash_attn_greater_or_equal_2_10()}')
     def get_input_embeddings(self):
         return self.embed_tokens