Crystalcareai
/

Quiet-Star-Custom

Text Generation

Model card Files Files and versions Community

Crystalcareai commited on Apr 3, 2024

Commit

25accc9

·

verified ·

1 Parent(s): 5e5e800

Update modeling_quiet.py

Files changed (1) hide show

modeling_quiet.py +6 -6

modeling_quiet.py CHANGED Viewed

@@ -1666,12 +1666,12 @@ class QuietForCausalLM(QuietPreTrainedModel, GenerationMixin):
                         head_input_hidden_states = talk_hidden_states
                     residual_logits = self.talk_head[0](head_input_hidden_states)
-					if self.use_shallow_talk:
-						residual_logits = apply_head(self.lm_head, residual_logits, detach=self.optimize_lm_head_only_at_start)
-						residual_logits = residual_logits.to(logits.device)
-						mixing_weights = self.mixing_head(torch.cat([cur_base_hidden, talk_hidden_states], dim=-1))
-						mixing_weights = torch.sigmoid(mixing_weights)
-						logits = base_logits * (1 - mixing_weights) + residual_logits * mixing_weights
                 assert sum([self.cumulative_residual, self.clever_residual, self.skip_residual, self.no_residual]) == 1
                 if self.clever_residual:
                     if ahead_idx >= self.n_ahead - 1:

                         head_input_hidden_states = talk_hidden_states
                     residual_logits = self.talk_head[0](head_input_hidden_states)
+                    if self.use_shallow_talk:
+                        residual_logits = apply_head(self.lm_head, residual_logits, detach=self.optimize_lm_head_only_at_start)
+                        residual_logits = residual_logits.to(logits.device)
+                        mixing_weights = self.mixing_head(torch.cat([cur_base_hidden, talk_hidden_states], dim=-1))
+                        mixing_weights = torch.sigmoid(mixing_weights)
+                        logits = base_logits * (1 - mixing_weights) + residual_logits * mixing_weights
                 assert sum([self.cumulative_residual, self.clever_residual, self.skip_residual, self.no_residual]) == 1
                 if self.clever_residual:
                     if ahead_idx >= self.n_ahead - 1: