Maxlegrec
/

ChessBot

@@ -594,8 +594,7 @@ class ChessBotModel(ChessBotPreTrainedModel):
         Get a move from FEN string without thinking
         """
         board = chess.Board(fen)
-        legal_moves = [move.uci() for move in board.legal_moves]
         if not legal_moves:
             return None
@@ -608,6 +607,19 @@ class ChessBotModel(ChessBotPreTrainedModel):
         with torch.no_grad():
             _, policy, _, _ = self.forward(fen_tensor)
             policy = policy.squeeze(0).squeeze(0)  # Remove batch and sequence dimensions
         # Apply temperature
         if T > 0:
@@ -619,13 +631,8 @@ class ChessBotModel(ChessBotPreTrainedModel):
         # Map to legal moves
         legal_move_probs = {}
         for move in legal_moves:
-            if move in policy_index:
-                idx = policy_index.index(move)
-                legal_move_probs[move] = probs[idx].item()
-        if not legal_move_probs:
-            # If no legal moves found in policy, return random legal move
-            return np.random.choice(legal_moves)
         # Select move based on probabilities
         if return_probs:
@@ -638,45 +645,14 @@ class ChessBotModel(ChessBotPreTrainedModel):
             # Normalize probabilities
             total_prob = sum(move_probs)
-            if total_prob > 0:
-                move_probs = [p / total_prob for p in move_probs]
-                selected_move = np.random.choice(moves, p=move_probs)
-            else:
-                selected_move = np.random.choice(moves)
         else:
             # Consider all moves in policy
             selected_move = policy_index[torch.multinomial(probs, 1).item()]
         return selected_move
-    def get_position_value(self, fen, device="cuda"):
-        """
-        Get the value evaluation for a given FEN position.
-        Returns the value vector [black_win_prob, draw_prob, white_win_prob]
-        """
-        x = torch.from_numpy(fen_to_tensor(fen)).to(device).to(torch.float32)
-        x = x.view(1, 1, 8, 8, 19)
-        # Forward pass through the model to get value
-        with torch.no_grad():
-            # We need to run through the model layers to get to value_head
-            b, seq_len, _, _, emb = x.size()
-            x_processed = x.view(b * seq_len, 64, emb)
-            x_processed = self.linear1(x_processed)
-            x_processed = F.gelu(x_processed)
-            x_processed = self.layernorm1(x_processed)
-            x_processed = self.ma_gating(x_processed)
-            pos_enc = self.positional(x_processed)
-            for i in range(self.num_layers):
-                x_processed = self.layers[i](x_processed, pos_enc)
-            value_logits = self.value_head_q(x_processed)
-            value_logits = value_logits.view(b, seq_len, 3)
-            value_logits = torch.softmax(value_logits, dim=-1)
-        return value_logits.squeeze()  # Remove batch and sequence dimensions
     def get_batch_position_values(self, fens, device="cuda"):
         """
         Get the value evaluation for a batch of FEN positions efficiently.

         Get a move from FEN string without thinking
         """
         board = chess.Board(fen)
+        legal_moves = [move.uci() if move.uci() in policy_index else move.uci()[:-1] for move in board.legal_moves]
         if not legal_moves:
             return None
         with torch.no_grad():
             _, policy, _, _ = self.forward(fen_tensor)
             policy = policy.squeeze(0).squeeze(0)  # Remove batch and sequence dimensions
+        if T == 0:
+            if force_legal:
+                # Find the move with the highest policy value that is legal
+                legal_moves_mask = - torch.ones_like(policy) * 999
+                for move in legal_moves:
+                        legal_moves_mask[policy_index[move]] = 0
+                policy = legal_moves_mask + policy
+                return policy_index[torch.argmax(policy).item()]
+            else:
+                max_policy_index = torch.argmax(policy).item()
+                max_policy_move = policy_index[max_policy_index]
+                return max_policy_move
         # Apply temperature
         if T > 0:
         # Map to legal moves
         legal_move_probs = {}
         for move in legal_moves:
+            idx = policy_index.index(move_trunc)
+            legal_move_probs[move] = probs[idx].item()
         # Select move based on probabilities
         if return_probs:
             # Normalize probabilities
             total_prob = sum(move_probs)
+            move_probs = [p / total_prob for p in move_probs]
+            selected_move = np.random.choice(moves, p=move_probs)
         else:
             # Consider all moves in policy
             selected_move = policy_index[torch.multinomial(probs, 1).item()]
         return selected_move
     def get_batch_position_values(self, fens, device="cuda"):
         """
         Get the value evaluation for a batch of FEN positions efficiently.