Spaces:

rootstrap-org
/

wordle-solver

Sleeping

App Files Files Community

santit96 commited on May 3, 2023

Commit

c10a05f

1 Parent(s): d560781

Fix code styles

Browse files

Files changed (15) hide show

a3c/eval.py +10 -3
a3c/net.py +2 -2
a3c/play.py +1 -1
a3c/shared_adam.py +6 -2
a3c/train.py +24 -6
a3c/worker.py +10 -4
api_rest/api.py +2 -1
main.py +76 -18
rs_wordle_player/firebase_connector.py +4 -1
rs_wordle_player/selenium_player.py +4 -2
wordle_env/state.py +27 -9
wordle_env/test_wordle.py +16 -7
wordle_env/wordle.py +16 -12
wordle_env/words.py +14 -6
wordle_game.py +7 -3

a3c/eval.py CHANGED Viewed

@@ -13,7 +13,11 @@ def evaluate_checkpoints(dir, env):
         if os.path.isfile(pretrained_model_path):
             wins, guesses = evaluate(env, pretrained_model_path)
             results[checkpoint] = wins, guesses
-    return dict(sorted(results.items(), key=lambda x: (x[1][0], -x[1][1]), reverse=True))
 def evaluate(env, pretrained_model_path):
@@ -30,6 +34,9 @@ def evaluate(env, pretrained_model_path):
         # else:
         #     print("Lost!", goal_word, outcomes)
         n_guesses += len(outcomes)
-    print(f"Evaluation complete, won {n_wins/N*100}% and took {n_win_guesses/n_wins} guesses per win, "
-          f"{n_guesses / N} including losses.")
     return n_wins/N*100, n_win_guesses/n_wins

         if os.path.isfile(pretrained_model_path):
             wins, guesses = evaluate(env, pretrained_model_path)
             results[checkpoint] = wins, guesses
+    return dict(
+        sorted(results.items(), key=lambda x: (
+            x[1][0], -x[1][1]), reverse=True
+        )
+    )
 def evaluate(env, pretrained_model_path):
         # else:
         #     print("Lost!", goal_word, outcomes)
         n_guesses += len(outcomes)
+    print(
+        f"Evaluation complete, won {n_wins/N*100}% and \
+        took {n_win_guesses/n_wins} guesses per win, "
+        f"{n_guesses / N} including losses."
+    )
     return n_wins/N*100, n_win_guesses/n_wins

a3c/net.py CHANGED Viewed

@@ -23,7 +23,7 @@ class Net(nn.Module):
         word_array = np.zeros((word_width, len(word_list)))
         for i, word in enumerate(word_list):
             for j, c in enumerate(word):
-                word_array[ j*26 + (ord(c) - ord('A')), i ] = 1
         self.words = torch.Tensor(word_array)
     def forward(self, x):
@@ -47,7 +47,7 @@ class Net(nn.Module):
         logits, values = self.forward(s)
         td = v_t - values
         c_loss = td.pow(2)
         probs = F.softmax(logits, dim=1)
         m = self.distribution(probs)
         exp_v = m.log_prob(a) * td.detach().squeeze()

         word_array = np.zeros((word_width, len(word_list)))
         for i, word in enumerate(word_list):
             for j, c in enumerate(word):
+                word_array[j*26 + (ord(c) - ord('A')), i] = 1
         self.words = torch.Tensor(word_array)
     def forward(self, x):
         logits, values = self.forward(s)
         td = v_t - values
         c_loss = td.pow(2)
         probs = F.softmax(logits, dim=1)
         m = self.distribution(probs)
         exp_v = m.log_prob(a) * td.detach().squeeze()

a3c/play.py CHANGED Viewed

@@ -52,7 +52,7 @@ def suggest(
     return env.words[net.choose_action(v_wrap(state[None, :]))]
-def play(env, pretrained_model_path, goal_word = None):
     env = env.unwrapped
     net = get_net(env, pretrained_model_path)
     state = get_initial_state(env)

     return env.words[net.choose_action(v_wrap(state[None, :]))]
+def play(env, pretrained_model_path, goal_word=None):
     env = env.unwrapped
     net = get_net(env, pretrained_model_path)
     state = get_initial_state(env)

a3c/shared_adam.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """
-Shared optimizer, the parameters in the optimizer will shared in the multiprocessors.
 """
 import torch
@@ -7,7 +8,10 @@ import torch
 class SharedAdam(torch.optim.Adam):
     def __init__(self, params, lr=1e-3, betas=(0.9, 0.99), eps=1e-8,
                  weight_decay=0):
-        super(SharedAdam, self).__init__(params, lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
         # State initialization
         for group in self.param_groups:
             for p in group['params']:

 """
+Shared optimizer, the parameters in the optimizer
+will shared in the multiprocessors.
 """
 import torch
 class SharedAdam(torch.optim.Adam):
     def __init__(self, params, lr=1e-3, betas=(0.9, 0.99), eps=1e-8,
                  weight_decay=0):
+        super(SharedAdam, self).__init__(
+            params, lr=lr,
+            betas=betas, eps=eps, weight_decay=weight_decay
+        )
         # State initialization
         for group in self.param_groups:
             for p in group['params']:

a3c/train.py CHANGED Viewed

@@ -21,7 +21,17 @@ def _set_seed(seed: int = 100) -> None:
     os.environ["PYTHONHASHSEED"] = str(seed)
-def train(env, max_ep, model_checkpoint_dir, gamma=0., seed=100, pretrained_model_path=None, save=False, min_reward=9.9, every_n_save=100):
     os.environ["OMP_NUM_THREADS"] = "1"
     if not os.path.exists(model_checkpoint_dir):
         os.makedirs(model_checkpoint_dir)
@@ -35,12 +45,19 @@ def train(env, max_ep, model_checkpoint_dir, gamma=0., seed=100, pretrained_mode
     if pretrained_model_path:
         gnet.load_state_dict(torch.load(pretrained_model_path))
     gnet.share_memory()  # share the global parameters in multiprocessing
-    opt = SharedAdam(gnet.parameters(), lr=1e-4, betas=(0.92, 0.999))  # global optimizer
-    global_ep, global_ep_r, res_queue, win_ep = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue(), mp.Value('i', 0)
     # parallel training
-    workers = [Worker(max_ep, gnet, opt, global_ep, global_ep_r, res_queue, i, env, n_s, n_a,
-                      words_list, word_width, win_ep, model_checkpoint_dir, gamma, pretrained_model_path, save, min_reward, every_n_save) for i in range(mp.cpu_count())]
     [w.start() for w in workers]
     res = []  # record episode reward to plot
     while True:
@@ -51,5 +68,6 @@ def train(env, max_ep, model_checkpoint_dir, gamma=0., seed=100, pretrained_mode
             break
     [w.join() for w in workers]
     if save:
-        torch.save(gnet.state_dict(), os.path.join(model_checkpoint_dir, f'model_{env.unwrapped.spec.id}.pth'))
     return global_ep, win_ep, gnet, res

     os.environ["PYTHONHASHSEED"] = str(seed)
+def train(
+    env,
+    max_ep,
+    model_checkpoint_dir,
+    gamma=0.,
+    seed=100,
+    pretrained_model_path=None,
+    save=False,
+    min_reward=9.9,
+    every_n_save=100
+):
     os.environ["OMP_NUM_THREADS"] = "1"
     if not os.path.exists(model_checkpoint_dir):
         os.makedirs(model_checkpoint_dir)
     if pretrained_model_path:
         gnet.load_state_dict(torch.load(pretrained_model_path))
     gnet.share_memory()  # share the global parameters in multiprocessing
+    opt = SharedAdam(gnet.parameters(), lr=1e-4,
+                     betas=(0.92, 0.999))  # global optimizer
+    global_ep, global_ep_r, res_queue, win_ep = mp.Value(
+        'i', 0), mp.Value('d', 0.), mp.Queue(), mp.Value('i', 0)
     # parallel training
+    workers = [
+        Worker(
+            max_ep, gnet, opt, global_ep, global_ep_r, res_queue, i, env,
+            n_s, n_a, words_list, word_width, win_ep, model_checkpoint_dir,
+            gamma, pretrained_model_path, save, min_reward, every_n_save
+        ) for i in range(mp.cpu_count())
+    ]
     [w.start() for w in workers]
     res = []  # record episode reward to plot
     while True:
             break
     [w.join() for w in workers]
     if save:
+        torch.save(gnet.state_dict(), os.path.join(
+            model_checkpoint_dir, f'model_{env.unwrapped.spec.id}.pth'))
     return global_ep, win_ep, gnet, res

a3c/worker.py CHANGED Viewed

@@ -36,7 +36,10 @@ class Worker(mp.Process):
         super(Worker, self).__init__()
         self.max_ep = max_ep
         self.name = 'w%02i' % name
-        self.g_ep, self.g_ep_r, self.res_queue, self.winning_ep = global_ep, global_ep_r, res_queue, winning_ep
         self.gnet, self.opt = gnet, opt
         self.word_list = words_list
         # local network
@@ -91,8 +94,10 @@ class Worker(mp.Process):
         loss = self.lnet.loss_func(
             v_wrap(np.vstack(bs)),
-            v_wrap(np.array(ba), dtype=np.int64) if ba[0].dtype == np.int64 else v_wrap(np.vstack(ba)),
-            v_wrap(np.array(buffer_v_target)[:, None]))
         # calculate local gradients and push local parameters to global
         self.opt.zero_grad()
@@ -105,7 +110,8 @@ class Worker(mp.Process):
         self.lnet.load_state_dict(self.gnet.state_dict())
     def save_model(self):
-        if self.save and self.g_ep_r.value >= self.min_reward and self.g_ep.value % self.every_n_save == 0:
             torch.save(self.gnet.state_dict(), os.path.join(
                 self.model_checkpoint_dir, f'model_{self.g_ep.value}.pth'))

         super(Worker, self).__init__()
         self.max_ep = max_ep
         self.name = 'w%02i' % name
+        self.g_ep = global_ep
+        self.g_ep_r = global_ep_r
+        self.res_queue = res_queue
+        self.winning_ep = winning_ep
         self.gnet, self.opt = gnet, opt
         self.word_list = words_list
         # local network
         loss = self.lnet.loss_func(
             v_wrap(np.vstack(bs)),
+            v_wrap(np.array(ba), dtype=np.int64) if
+            ba[0].dtype == np.int64 else v_wrap(np.vstack(ba)),
+            v_wrap(np.array(buffer_v_target)[:, None])
+        )
         # calculate local gradients and push local parameters to global
         self.opt.zero_grad()
         self.lnet.load_state_dict(self.gnet.state_dict())
     def save_model(self):
+        if (self.save and self.g_ep_r.value >= self.min_reward and
+                self.g_ep.value % self.every_n_save == 0):
             torch.save(self.gnet.state_dict(), os.path.join(
                 self.model_checkpoint_dir, f'model_{self.g_ep.value}.pth'))

api_rest/api.py CHANGED Viewed

@@ -29,7 +29,8 @@ def get_play():
     word = word.upper()
     env = get_env()
     model_path = get_play_model_path()
-    # Call the play function with the goal word and return the attempts and the result
     won, attempts = play(env, model_path, word)
     return jsonify({'attempts': attempts, 'won': won})

     word = word.upper()
     env = get_env()
     model_path = get_play_model_path()
+    # Call the play function with the goal word
+    # and return the attempts and the result
     won, attempts = play(env, model_path, word)
     return jsonify({'attempts': attempts, 'won': won})

main.py CHANGED Viewed

@@ -13,8 +13,14 @@ from wordle_env.wordle import get_env
 def training_mode(args, env, model_checkpoint_dir):
     max_ep = args.games
     start_time = time.time()
-    pretrained_model_path = os.path.join(model_checkpoint_dir, args.model_name) if args.model_name else args.model_name
-    global_ep, win_ep, gnet, res = train(env, max_ep, model_checkpoint_dir, args.gamma, args.seed, pretrained_model_path, args.save, args.min_reward, args.every_n_save)
     print("--- %.0f seconds ---" % (time.time() - start_time))
     print_results(global_ep, win_ep, res)
     evaluate(gnet, env)
@@ -28,8 +34,8 @@ def evaluation_mode(args, env, model_checkpoint_dir):
 def play_mode(args, env, model_checkpoint_dir):
     print("Play mode")
-    words = [ word.strip() for word in args.words.split(',') ]
-    states = [ state.strip() for state in args.states.split(',') ]
     pretrained_model_path = os.path.join(model_checkpoint_dir, args.model_name)
     word = suggest(env, words, states, pretrained_model_path)
     print(word)
@@ -47,27 +53,64 @@ def print_results(global_ep, win_ep, res):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "enviroment", help="Enviroment (type of wordle game) used for training, example: WordleEnvFull-v0")
     parser.add_argument(
-        "--models_dir", help="Directory where models are saved (default=checkpoints)", default='checkpoints')
     subparsers = parser.add_subparsers(help='sub-command help')
     parser_train = subparsers.add_parser(
-        'train', help='Train a model from scratch or train from pretrained model')
     parser_train.add_argument(
-        "--games", "-g", help="Number of games to train", type=int, required=True)
     parser_train.add_argument(
-        "--model_name", "-m", help="If want to train from a pretrained model, the name of the pretrained model file")
     parser_train.add_argument(
-        "--gamma", help="Gamma hyperparameter (discount factor) value", type=float, default=0.)
     parser_train.add_argument(
-        "--seed", help="Seed used for random numbers generation", type=int, default=100)
     parser_train.add_argument(
-        "--save", '-s', help="Save instances of the model while training", action='store_true')
     parser_train.add_argument(
-        "--min_reward", help="The minimun global reward value achieved for saving the model", type=float, default=9.9)
     parser_train.add_argument(
-        "--every_n_save", help="Check every n training steps to save the model", type=int, default=100)
     parser_train.set_defaults(func=training_mode)
     parser_eval = subparsers.add_parser(
@@ -75,13 +118,28 @@ if __name__ == "__main__":
     parser_eval.set_defaults(func=evaluation_mode)
     parser_play = subparsers.add_parser(
-        'play', help='Give the model a word and the state result and the model will try to predict the goal word')
     parser_play.add_argument(
-        "--words", "-w", help="List of words played in the wordle game", required=True)
     parser_play.add_argument(
-        "--states", "-st", help="List of states returned by playing each of the words", required=True)
     parser_play.add_argument(
-        "--model_name", "-m", help="Name of the pretrained model file thich will play the game", required=True)
     parser_play.set_defaults(func=play_mode)
     args = parser.parse_args()

 def training_mode(args, env, model_checkpoint_dir):
     max_ep = args.games
     start_time = time.time()
+    pretrained_model_path = os.path.join(
+        model_checkpoint_dir, args.model_name
+    ) if args.model_name else args.model_name
+    global_ep, win_ep, gnet, res = train(
+        env, max_ep, model_checkpoint_dir, args.gamma,
+        args.seed, pretrained_model_path, args.save,
+        args.min_reward, args.every_n_save
+    )
     print("--- %.0f seconds ---" % (time.time() - start_time))
     print_results(global_ep, win_ep, res)
     evaluate(gnet, env)
 def play_mode(args, env, model_checkpoint_dir):
     print("Play mode")
+    words = [word.strip() for word in args.words.split(',')]
+    states = [state.strip() for state in args.states.split(',')]
     pretrained_model_path = os.path.join(model_checkpoint_dir, args.model_name)
     word = suggest(env, words, states, pretrained_model_path)
     print(word)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
+        "enviroment",
+        help="Enviroment (type of wordle game) used for training, \
+            example: WordleEnvFull-v0"
+    )
     parser.add_argument(
+        "--models_dir",
+        help="Directory where models are saved (default=checkpoints)",
+        default='checkpoints'
+    )
     subparsers = parser.add_subparsers(help='sub-command help')
     parser_train = subparsers.add_parser(
+        'train',
+        help='Train a model from scratch or train from pretrained model'
+    )
     parser_train.add_argument(
+        "--games",
+        "-g",
+        help="Number of games to train",
+        type=int,
+        required=True
+    )
     parser_train.add_argument(
+        "--model_name",
+        "-m",
+        help="If want to train from a pretrained model, \
+            the name of the pretrained model file"
+    )
     parser_train.add_argument(
+        "--gamma",
+        help="Gamma hyperparameter (discount factor) value",
+        type=float,
+        default=0.
+    )
     parser_train.add_argument(
+        "--seed",
+        help="Seed used for random numbers generation",
+        type=int,
+        default=100
+    )
     parser_train.add_argument(
+        "--save",
+        '-s',
+        help="Save instances of the model while training",
+        action='store_true'
+    )
     parser_train.add_argument(
+        "--min_reward",
+        help="The minimun global reward value achieved for saving the model",
+        type=float,
+        default=9.9
+    )
     parser_train.add_argument(
+        "--every_n_save",
+        help="Check every n training steps to save the model",
+        type=int,
+        default=100
+    )
     parser_train.set_defaults(func=training_mode)
     parser_eval = subparsers.add_parser(
     parser_eval.set_defaults(func=evaluation_mode)
     parser_play = subparsers.add_parser(
+        'play',
+        help='Give the model a word and the state result \
+            and the model will try to predict the goal word'
+    )
     parser_play.add_argument(
+        "--words",
+        "-w",
+        help="List of words played in the wordle game",
+        required=True
+    )
     parser_play.add_argument(
+        "--states",
+        "-st",
+        help="List of states returned by playing each of the words",
+        required=True
+    )
     parser_play.add_argument(
+        "--model_name",
+        "-m",
+        help="Name of the pretrained model file thich will play the game",
+        required=True
+    )
     parser_play.set_defaults(func=play_mode)
     args = parser.parse_args()

rs_wordle_player/firebase_connector.py CHANGED Viewed

@@ -31,7 +31,10 @@ class FirebaseConnector():
         result_number_map = {'incorrect': '0',
                              'misplaced': '1',
                              'correct': '2'}
-        return ''.join(map(lambda char_res: result_number_map[char_res], firebase_result))
     def today(self):
         return datetime.today().strftime('%Y%m%d')

         result_number_map = {'incorrect': '0',
                              'misplaced': '1',
                              'correct': '2'}
+        char_result_map = map(
+            lambda char_res: result_number_map[char_res], firebase_result
+        )
+        return ''.join(char_result_map)
     def today(self):
         return datetime.today().strftime('%Y%m%d')

rs_wordle_player/selenium_player.py CHANGED Viewed

@@ -56,11 +56,13 @@ class SeleniumPlayer():
             element.send_keys(Keys.ENTER)
             self.driver.switch_to.window(wordle_window)
             time.sleep(5)
-            onboard_div = self.driver.find_element(By.CLASS_NAME, 'onboarding-modal-container')
             onboard_btn = onboard_div.find_elements(By.TAG_NAME, 'button')
             onboard_btn[-1].click()
     def play_word(self, word):
         try:
             element = self.driver.find_element(By.TAG_NAME, 'html')

             element.send_keys(Keys.ENTER)
             self.driver.switch_to.window(wordle_window)
             time.sleep(5)
+            onboard_div = self.driver.find_element(
+                By.CLASS_NAME,
+                'onboarding-modal-container'
+            )
             onboard_btn = onboard_div.find_elements(By.TAG_NAME, 'button')
             onboard_btn[-1].click()
     def play_word(self, word):
         try:
             element = self.driver.find_element(By.TAG_NAME, 'html')

wordle_env/state.py CHANGED Viewed

@@ -40,7 +40,11 @@ SOMEWHERE = 1
 YES = 2
-def update_from_mask(state: WordleState, word: str, mask: List[int]) -> WordleState:
     """
     return a copy of state that has been updated to new state
@@ -71,7 +75,9 @@ def update_from_mask(state: WordleState, word: str, mask: List[int]) -> WordleSt
         offset = 1 + cint * WORDLE_N * 3
         if mask[i] == SOMEWHERE:
             prior_maybe.append(c)
-            # Char at position i = no, and in other positions maybe except it had a value before, other chars stay as they are
             _set_no(state, offset, i)
             _set_if_cero(state, offset, [0, 1, 0])
         elif mask[i] == NO:
@@ -80,7 +86,8 @@ def update_from_mask(state: WordleState, word: str, mask: List[int]) -> WordleSt
                 # Then the maybe could be anywhere except here
                 state[offset+3*i:offset+3*i+3] = [1, 0, 0]
             elif c in prior_yes:
-                # No maybe, definitely a yes, so it's zero everywhere except the yesses
                 for j in range(WORDLE_N):
                     # Only flip no if previously was maybe
                     if state[offset + 3 * j:offset + 3 * j + 3][1] == 1:
@@ -129,7 +136,11 @@ def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
     return update_from_mask(state, word, mask)
-def update(state: WordleState, word: str, goal_word: str) -> Tuple[WordleState, float]:
     state = state.copy()
     reward = 0
     state[0] -= 1
@@ -147,15 +158,20 @@ def update(state: WordleState, word: str, goal_word: str) -> Tuple[WordleState,
         cint = ord(c) - ord(WORDLE_CHARS[0])
         offset = 1 + cint * WORDLE_N * 3
         if goal_word[i] != c:
-            if c in goal_word and goal_word.count(c) > processed_letters.count(c):
-                # Char at position i = no, and in other positions maybe except it had a value before, other chars stay as they are
                 _set_no(state, offset, i)
                 _set_if_cero(state, offset, [0, 1, 0])
                 reward += CHAR_REWARD * 0.1
             elif c not in goal_word:
                 # Char at all positions = no
                 _set_all_no(state, offset)
-            else: # goal_word.count(c) <= processed_letters.count(c) and goal in word
                 # At i and in every position which is not set = no
                 _set_no(state, offset, i)
                 _set_if_cero(state, offset, [1, 0, 0])
@@ -173,13 +189,15 @@ def _set_if_cero(state, offset, value):
 def _set_yes(state, offset, char_int, char_pos):
-    # char at position char_pos = yes, all other chars at position char_pos == no
     pos_offset = 3 * char_pos
     state[offset + pos_offset:offset + pos_offset + 3] = [0, 0, 1]
     for ocint in range(len(WORDLE_CHARS)):
         if ocint != char_int:
             oc_offset = 1 + ocint * WORDLE_N * 3
-            state[oc_offset + pos_offset:oc_offset + pos_offset + 3] = [1, 0, 0]
 def _set_no(state, offset, char_pos):

 YES = 2
+def update_from_mask(
+    state: WordleState,
+    word: str,
+    mask: List[int]
+) -> WordleState:
     """
     return a copy of state that has been updated to new state
         offset = 1 + cint * WORDLE_N * 3
         if mask[i] == SOMEWHERE:
             prior_maybe.append(c)
+            # Char at position i = no,
+            # and in other positions maybe except it had a value before,
+            # other chars stay as they are
             _set_no(state, offset, i)
             _set_if_cero(state, offset, [0, 1, 0])
         elif mask[i] == NO:
                 # Then the maybe could be anywhere except here
                 state[offset+3*i:offset+3*i+3] = [1, 0, 0]
             elif c in prior_yes:
+                # No maybe, definitely a yes,
+                # so it's zero everywhere except the yesses
                 for j in range(WORDLE_N):
                     # Only flip no if previously was maybe
                     if state[offset + 3 * j:offset + 3 * j + 3][1] == 1:
     return update_from_mask(state, word, mask)
+def update(
+    state: WordleState,
+    word: str,
+    goal_word: str
+) -> Tuple[WordleState, float]:
     state = state.copy()
     reward = 0
     state[0] -= 1
         cint = ord(c) - ord(WORDLE_CHARS[0])
         offset = 1 + cint * WORDLE_N * 3
         if goal_word[i] != c:
+            if (c in goal_word and
+                    goal_word.count(c) > processed_letters.count(c)):
+                # Char at position i = no,
+                # and in other positions maybe except it had a value before,
+                # other chars stay as they are
                 _set_no(state, offset, i)
                 _set_if_cero(state, offset, [0, 1, 0])
                 reward += CHAR_REWARD * 0.1
             elif c not in goal_word:
                 # Char at all positions = no
                 _set_all_no(state, offset)
+            else:
+                # goal_word.count(c) <= processed_letters.count(c)
+                # and goal in word
                 # At i and in every position which is not set = no
                 _set_no(state, offset, i)
                 _set_if_cero(state, offset, [1, 0, 0])
 def _set_yes(state, offset, char_int, char_pos):
+    # char at position char_pos = yes,
+    # all other chars at position char_pos == no
     pos_offset = 3 * char_pos
     state[offset + pos_offset:offset + pos_offset + 3] = [0, 0, 1]
     for ocint in range(len(WORDLE_CHARS)):
         if ocint != char_int:
             oc_offset = 1 + ocint * WORDLE_N * 3
+            yes_index = oc_offset + pos_offset
+            state[yes_index:yes_index + 3] = [1, 0, 0]
 def _set_no(state, offset, char_pos):

wordle_env/test_wordle.py CHANGED Viewed

@@ -109,10 +109,12 @@ def test_lose_reward(wordleEnv):
     except ValueError:
         pass
 def letter_test(char, state, letter_state):
     offset = 1+3*5*(ord(char)-ord('A'))
     assert tuple(state[offset:offset+15]) == letter_state
 def test_step(wordleEnv):
     wordleEnv.reset()
     wordleEnv.set_goal_encoded(0)
@@ -218,6 +220,7 @@ def test_step(wordleEnv):
     assert wordleEnv.done
     assert reward == wordle.REWARD
 def test_special_step_cases(wordleEnv):
     wordleEnv.reset()
     wordleEnv.set_goal_encoded(4)
@@ -291,14 +294,16 @@ def test_special_step_cases(wordleEnv):
                     1, 0, 0)
     letter_test('P', new_state, letter_state)
 def test_mask_update(wordleEnv):
     wordleEnv.reset()
     wordleEnv.set_goal_encoded(0)
     cur_state = wordleEnv.state
-    #"APPAA"
-    #"APPAB"
-    new_state = state.update_from_mask(cur_state, wordleEnv.words[1], [2, 2, 2, 2, 0])
     # Expect B to be all 1,0,0
     letter_test('B', new_state, tuple([1, 0, 0]*5))
@@ -328,7 +333,8 @@ def test_mask_update(wordleEnv):
     # "APPAA",
     # "APPAB",
     # "APAPD",
-    new_state = state.update_from_mask(new_state, wordleEnv.words[3], [2, 2, 1, 1, 0])
     # Expect D to be all 1,0,0
     letter_state = tuple([1, 0, 0]*5)
     letter_test('D', new_state, letter_state)
@@ -354,7 +360,8 @@ def test_mask_update(wordleEnv):
     wordleEnv.set_goal_encoded(4)
     # BPPAB - goal
     # PPAPB - 1st guess
-    new_state = state.update_from_mask(cur_state, wordleEnv.words[5], [1, 2, 1, 0, 2])
     # Expect A to be all maybe except 2, 1 and 4 that are no
     letter_state = (0, 1, 0,
                     1, 0, 0,
@@ -379,7 +386,8 @@ def test_mask_update(wordleEnv):
     # BPPAB - goal
     # PPAPB - 1st guess
     # PPBBA - 2nd guess
-    new_state = state.update_from_mask(new_state, wordleEnv.words[6], [1, 2, 1, 1, 1])
     # Expect A to be all maybe except 2, 1 and 4 that are no
     letter_state = (0, 1, 0,
                     1, 0, 0,
@@ -405,7 +413,8 @@ def test_mask_update(wordleEnv):
     wordleEnv.set_goal_encoded(7)
     # BPABB - goal
     # PPPAC - 1st guess
-    new_state = state.update_from_mask(new_state, wordleEnv.words[8], [0, 2, 0, 1, 0])
     new_state, _, _, _ = wordleEnv.step(8)
     # Expect A to be all maybe except 1 and 3 that is no
     letter_state = (0, 1, 0,

     except ValueError:
         pass
 def letter_test(char, state, letter_state):
     offset = 1+3*5*(ord(char)-ord('A'))
     assert tuple(state[offset:offset+15]) == letter_state
 def test_step(wordleEnv):
     wordleEnv.reset()
     wordleEnv.set_goal_encoded(0)
     assert wordleEnv.done
     assert reward == wordle.REWARD
 def test_special_step_cases(wordleEnv):
     wordleEnv.reset()
     wordleEnv.set_goal_encoded(4)
                     1, 0, 0)
     letter_test('P', new_state, letter_state)
 def test_mask_update(wordleEnv):
     wordleEnv.reset()
     wordleEnv.set_goal_encoded(0)
     cur_state = wordleEnv.state
+    # "APPAA"
+    # "APPAB"
+    new_state = state.update_from_mask(
+        cur_state, wordleEnv.words[1], [2, 2, 2, 2, 0])
     # Expect B to be all 1,0,0
     letter_test('B', new_state, tuple([1, 0, 0]*5))
     # "APPAA",
     # "APPAB",
     # "APAPD",
+    new_state = state.update_from_mask(
+        new_state, wordleEnv.words[3], [2, 2, 1, 1, 0])
     # Expect D to be all 1,0,0
     letter_state = tuple([1, 0, 0]*5)
     letter_test('D', new_state, letter_state)
     wordleEnv.set_goal_encoded(4)
     # BPPAB - goal
     # PPAPB - 1st guess
+    new_state = state.update_from_mask(
+        cur_state, wordleEnv.words[5], [1, 2, 1, 0, 2])
     # Expect A to be all maybe except 2, 1 and 4 that are no
     letter_state = (0, 1, 0,
                     1, 0, 0,
     # BPPAB - goal
     # PPAPB - 1st guess
     # PPBBA - 2nd guess
+    new_state = state.update_from_mask(
+        new_state, wordleEnv.words[6], [1, 2, 1, 1, 1])
     # Expect A to be all maybe except 2, 1 and 4 that are no
     letter_state = (0, 1, 0,
                     1, 0, 0,
     wordleEnv.set_goal_encoded(7)
     # BPABB - goal
     # PPPAC - 1st guess
+    new_state = state.update_from_mask(
+        new_state, wordleEnv.words[8], [0, 2, 0, 1, 0])
     new_state, _, _, _ = wordleEnv.step(8)
     # Expect A to be all maybe except 1 and 3 that is no
     letter_state = (0, 1, 0,

wordle_env/wordle.py CHANGED Viewed

@@ -1,9 +1,6 @@
-import os
-from typing import Optional, List, Tuple
 import gym
 from gym import spaces
-import numpy as np
 from . import state
 from .const import WORDLE_N, REWARD, WORDLE_CHARS
@@ -13,7 +10,10 @@ from .words import complete_vocabulary, target_vocabulary
 import random
-def _load_words(limit: Optional[int] = None, complete: Optional[bool] = False) -> List[str]:
     words = complete_vocabulary if complete else target_vocabulary
     return words if not limit else words[:limit]
@@ -29,11 +29,13 @@ class WordleEnvBase(gym.Env):
         * 13k for full vocab
     State space is defined as:
         * 6 possibilities for turns (WORDLE_TURNS)
-        * For each in VALID_CHARS [A-Z] can be in one of 3^WORDLE_N states: (No, Maybe, Yes)
         for full game, this is (3^5)^26
         Each state has 1 + 5*26 possibilities
     Reward:
-        Reward is 10 for guessing the right word, -10 for not guessing the right word after 6 guesses.
         1 from every letter correctly guessed on each try
     Starting State:
         Random goal word
@@ -44,7 +46,9 @@ class WordleEnvBase(gym.Env):
                  max_turns: int = 6,
                  allowable_words: Optional[int] = None,
                  mask_based_state_updates: bool = False):
-        assert all(len(w) == WORDLE_N for w in words), f'Not all words of length {WORDLE_N}, {words}'
         self.words = words
         self.max_turns = max_turns
         self.allowable_words = allowable_words
@@ -53,7 +57,8 @@ class WordleEnvBase(gym.Env):
             self.allowable_words = len(self.words)
         self.action_space = spaces.Discrete(self.words_as_action_space())
-        self.observation_space = spaces.MultiDiscrete(state.get_nvec(self.max_turns))
         self.done = True
         self.goal_word: int = -1
@@ -85,13 +90,12 @@ class WordleEnvBase(gym.Env):
             if state.remaining_steps(self.state) == self.max_turns-1:
                 reward = 0  # -10*REWARD  # No reward for guessing off the bat
             else:
-                # reward = REWARD*(self.state.remaining_steps() + 1) / self.max_turns
                 reward = REWARD
         elif state.remaining_steps(self.state) == 0:
             self.done = True
             reward = -REWARD
-        return self.state.copy(), reward, self.done, {"goal_id": self.goal_word}
     def reset(self):
         self.state = state.new(self.max_turns)

 import gym
 from gym import spaces
+from typing import Optional, List
 from . import state
 from .const import WORDLE_N, REWARD, WORDLE_CHARS
 import random
+def _load_words(
+    limit: Optional[int] = None,
+    complete: Optional[bool] = False
+) -> List[str]:
     words = complete_vocabulary if complete else target_vocabulary
     return words if not limit else words[:limit]
         * 13k for full vocab
     State space is defined as:
         * 6 possibilities for turns (WORDLE_TURNS)
+        * For each in VALID_CHARS [A-Z]
+        can be in one of 3^WORDLE_N states: (No, Maybe, Yes)
         for full game, this is (3^5)^26
         Each state has 1 + 5*26 possibilities
     Reward:
+        Reward is 10 for guessing the right word,
+        -10 for not guessing the right word after 6 guesses.
         1 from every letter correctly guessed on each try
     Starting State:
         Random goal word
                  max_turns: int = 6,
                  allowable_words: Optional[int] = None,
                  mask_based_state_updates: bool = False):
+        assert all(
+            len(w) == WORDLE_N for w in words
+        ), f'Not all words of length {WORDLE_N}, {words}'
         self.words = words
         self.max_turns = max_turns
         self.allowable_words = allowable_words
             self.allowable_words = len(self.words)
         self.action_space = spaces.Discrete(self.words_as_action_space())
+        self.observation_space = spaces.MultiDiscrete(
+            state.get_nvec(self.max_turns))
         self.done = True
         self.goal_word: int = -1
             if state.remaining_steps(self.state) == self.max_turns-1:
                 reward = 0  # -10*REWARD  # No reward for guessing off the bat
             else:
                 reward = REWARD
         elif state.remaining_steps(self.state) == 0:
             self.done = True
             reward = -REWARD
+        goal_dict = {"goal_id": self.goal_word}
+        return self.state.copy(), reward, self.done, goal_dict
     def reset(self):
         self.state = state.new(self.max_turns)

wordle_env/words.py CHANGED Viewed

@@ -1,22 +1,30 @@
 import os
 import urllib.request
-_COMPLETE_VOCABULARY_URL = "https://gist.githubusercontent.com/scholtes/94f3c0303ba6a7768b47583aff36654d/raw/d9cddf5e16140df9e14f19c2de76a0ef36fd2748/wordle-Ta.txt"
-_TARGET_VOCABULARY_URL = "https://gist.githubusercontent.com/scholtes/94f3c0303ba6a7768b47583aff36654d/raw/d9cddf5e16140df9e14f19c2de76a0ef36fd2748/wordle-La.txt"
 _DOWNLOADS_DIR = '.'
 _COMPLETE_VOCABULARY_FILENAME = "complete_vocabulary.txt"
 _TARGET_VOCABULARY_FILENAME = "target_vocabulary.txt"
 def _retrieve_vocabulary(url, filename, dir):
     vocabulary_file = os.path.join(dir, filename)
     # Download the file if it does not exist
     if not os.path.isfile(vocabulary_file):
         urllib.request.urlretrieve(url, vocabulary_file)
     with open(vocabulary_file) as file:
-        return  [line.rstrip().upper() for line in file]
-target_vocabulary = _retrieve_vocabulary(_TARGET_VOCABULARY_URL, _TARGET_VOCABULARY_FILENAME, _DOWNLOADS_DIR )
-complete_vocabulary = _retrieve_vocabulary(_COMPLETE_VOCABULARY_URL, _COMPLETE_VOCABULARY_FILENAME, _DOWNLOADS_DIR ) + target_vocabulary

 import os
 import urllib.request
+_COMPLETE_VOCABULARY_URL = "https://gist.githubusercontent.com/scholtes/\
+    94f3c0303ba6a7768b47583aff36654d/raw/\
+    d9cddf5e16140df9e14f19c2de76a0ef36fd2748/wordle-Ta.txt"
+_TARGET_VOCABULARY_URL = "https://gist.githubusercontent.com/scholtes/\
+    94f3c0303ba6a7768b47583aff36654d/raw/\
+    d9cddf5e16140df9e14f19c2de76a0ef36fd2748/wordle-La.txt"
 _DOWNLOADS_DIR = '.'
 _COMPLETE_VOCABULARY_FILENAME = "complete_vocabulary.txt"
 _TARGET_VOCABULARY_FILENAME = "target_vocabulary.txt"
 def _retrieve_vocabulary(url, filename, dir):
     vocabulary_file = os.path.join(dir, filename)
     # Download the file if it does not exist
     if not os.path.isfile(vocabulary_file):
         urllib.request.urlretrieve(url, vocabulary_file)
     with open(vocabulary_file) as file:
+        return [line.rstrip().upper() for line in file]
+target_vocabulary = _retrieve_vocabulary(
+    _TARGET_VOCABULARY_URL, _TARGET_VOCABULARY_FILENAME, _DOWNLOADS_DIR)
+complete_vocabulary = _retrieve_vocabulary(
+    _COMPLETE_VOCABULARY_URL, _COMPLETE_VOCABULARY_FILENAME, _DOWNLOADS_DIR
+) + target_vocabulary

wordle_game.py CHANGED Viewed

@@ -14,6 +14,7 @@ PLAYER_INSTRUCTIONS = "You may start guessing\n"
 GUESS_STATEMENT = "\nEnter your guess"
 ALLOWED_GUESSES = 6
 def correct_place(letter):
     return f'[black on green]{letter}[/]'
@@ -37,7 +38,8 @@ def check_guess(guess, answer):
             processed_letters.append(letter)
     for i, letter in enumerate(guess):
         if answer[i] != guess[i]:
-            if letter in answer and answer.count(letter) > processed_letters.count(letter):
                 guessed[i] = correct_letter(letter)
                 wordle_pattern.append(SQUARES['correct_letter'])
             else:
@@ -55,7 +57,8 @@ def game(console, chosen_word):
     while not end_of_game:
         guess = Prompt.ask(GUESS_STATEMENT).upper()
-        while len(guess) != 5 or guess in already_guessed or guess not in complete_vocabulary:
             if guess in already_guessed:
                 console.print("[red]You've already guessed this word!!\n[/]")
             else:
@@ -73,7 +76,8 @@ def game(console, chosen_word):
         console.print(f"\n[red]WORDLE X/{ALLOWED_GUESSES}[/]")
         console.print(f'\n[green]Correct Word: {chosen_word}[/]')
     else:
-        console.print(f"\n[green]WORDLE {len(already_guessed)}/{ALLOWED_GUESSES}[/]\n")
     console.print(*full_wordle_pattern, sep="\n")

 GUESS_STATEMENT = "\nEnter your guess"
 ALLOWED_GUESSES = 6
 def correct_place(letter):
     return f'[black on green]{letter}[/]'
             processed_letters.append(letter)
     for i, letter in enumerate(guess):
         if answer[i] != guess[i]:
+            if (letter in answer and
+                    answer.count(letter) > processed_letters.count(letter)):
                 guessed[i] = correct_letter(letter)
                 wordle_pattern.append(SQUARES['correct_letter'])
             else:
     while not end_of_game:
         guess = Prompt.ask(GUESS_STATEMENT).upper()
+        while (len(guess) != 5 or guess in already_guessed or
+               guess not in complete_vocabulary):
             if guess in already_guessed:
                 console.print("[red]You've already guessed this word!!\n[/]")
             else:
         console.print(f"\n[red]WORDLE X/{ALLOWED_GUESSES}[/]")
         console.print(f'\n[green]Correct Word: {chosen_word}[/]')
     else:
+        console.print(
+            f"\n[green]WORDLE {len(already_guessed)}/{ALLOWED_GUESSES}[/]\n")
     console.print(*full_wordle_pattern, sep="\n")