Crystalcareai
/

Quiet-Mistral

Text Generation

Transformers

Safetensors

quiet

custom_code

Model card Files Files and versions Community

Crystalcareai commited on Mar 26, 2024

Commit

b087ddf

verified ·

1 Parent(s): 90a26fc

Update modeling_quiet.py

Browse files

Files changed (1) hide show

modeling_quiet.py +26 -33

modeling_quiet.py CHANGED Viewed

@@ -929,40 +929,29 @@ class QuietModel(QuietPreTrainedModel):
         self.embed_tokens = value
     def _generate_thoughts(self, hidden_states, max_length):
-        thought_ids = []
         thought_embeddings = []
-        for _ in range(self.config.max_thoughts):
-            thought_id = torch.LongTensor([[self.config.start_token_id]]).to(hidden_states.device)
-            thought_embedding = self.embed_tokens(thought_id)
-            for _ in range(max_length):
-                outputs = self.forward(
-                    inputs_embeds=thought_embedding,
-                    attention_mask=None,
-                    use_cache=True,
-                    return_dict=True,  # Set return_dict=True
-                )
-                logits = self.lm_head(outputs.last_hidden_state)  # Use outputs.last_hidden_state instead of outputs.logits
-                next_token_id = torch.argmax(logits[:, -1, :], dim=-1)
-                if next_token_id == self.config.end_token_id:
-                    break
-                thought_id = torch.cat([thought_id, next_token_id.unsqueeze(0)], dim=-1)
-                thought_embedding = torch.cat([thought_embedding, self.embed_tokens(next_token_id.unsqueeze(0))], dim=1)
-            thought_ids.append(thought_id.squeeze(0))
-            thought_embeddings.append(thought_embedding.squeeze(0))
-            seq_length = hidden_states.size(1)
-            thought_embeddings = [
-                torch.nn.functional.pad(emb, (0, 0, 0, seq_length - emb.size(0)), mode='constant', value=0)[:seq_length]
-                for emb in thought_embeddings
-    ]
         return thought_ids, thought_embeddings
     @add_start_docstrings_to_model_forward(QUIET_INPUTS_DOCSTRING)
     def forward(
         self,
@@ -1229,13 +1218,17 @@ class QuietForCausalLM(QuietPreTrainedModel):
         hidden_states = outputs.last_hidden_state
         logits = self.lm_head(hidden_states)
-        thought_ids, thought_embeddings = self.model._generate_thoughts(hidden_states, max_length=self.thought_length)
         thought_hidden_states = self.model(inputs_embeds=thought_embeddings).last_hidden_state
         thought_logits = self.lm_head(thought_hidden_states)
-        mixing_input = torch.cat([hidden_states, thought_hidden_states], dim=-1)
-        mixing_weights = self.mixing_head(mixing_input).squeeze(-1)  # (batch_size, seq_length)
-        mixed_logits = base_logits * (1 - mixing_weights.unsqueeze(-1)) + thought_logits * mixing_weights.unsqueeze(-1)
         loss = None
         if labels is not None:
             # Shift so that tokens < n predict n

         self.embed_tokens = value
     def _generate_thoughts(self, hidden_states, max_length):
+        batch_size = hidden_states.size(0)
+        thought_ids = torch.zeros((batch_size, self.config.num_thoughts, max_length), dtype=torch.long, device=hidden_states.device)
         thought_embeddings = []
+        for i in range(self.config.num_thoughts):
+            thought_input_ids = torch.zeros((batch_size, 1), dtype=torch.long, device=hidden_states.device)
+            thought_outputs = self.model.generate(
+                input_ids=thought_input_ids,
+                max_length=max_length,
+                do_sample=True,
+                top_k=50,
+                top_p=0.95,
+                pad_token_id=self.config.pad_token_id,
+                eos_token_id=self.config.eos_token_id,
+            )
+            thought_ids[:, i, :] = thought_outputs
+            thought_embeddings.append(self.model.get_input_embeddings()(thought_outputs))
+        thought_embeddings = torch.stack(thought_embeddings, dim=1)
         return thought_ids, thought_embeddings
     @add_start_docstrings_to_model_forward(QUIET_INPUTS_DOCSTRING)
     def forward(
         self,
         hidden_states = outputs.last_hidden_state
         logits = self.lm_head(hidden_states)
+        thought_ids, thought_embeddings = self._generate_thoughts(hidden_states, max_length=self.config.max_thought_length)
         thought_hidden_states = self.model(inputs_embeds=thought_embeddings).last_hidden_state
+        # Compute thought logits
         thought_logits = self.lm_head(thought_hidden_states)
+        # Mix base and thought logits
+        mixed_logits = logits.unsqueeze(1) + self.mixing_head(thought_logits)
+        mixed_logits = mixed_logits.view(-1, mixed_logits.size(-1))
         loss = None
         if labels is not None:
             # Shift so that tokens < n predict n