import pytest import wordle import state TESTWORDS = [ "APPAA", "APPAB", "APPAC", "APPAD", "BPPAB", "BPPAC", "BPPAD", "CPPAB", "CPPAC", "CPPAD", ] @pytest.fixture def wordleEnv(): env = wordle.WordleEnvBase( words=TESTWORDS, max_turns=6, ) return env def test_reset(wordleEnv): wordleEnv.reset(seed=13) def test_guess_win(wordleEnv): wordleEnv.reset(seed=13) goal = wordleEnv.goal_word new_state, reward, done, _ = wordleEnv.step(goal) assert done assert wordleEnv.done assert reward == 0 try: wordleEnv.step(goal) raise ValueError("Shouldn't reach here!") except ValueError: pass def test_win_reward(wordleEnv): wordleEnv.reset(seed=13) goal = wordleEnv.goal_word word_index = (wordleEnv.words.index( wordleEnv.decode_word(goal)) + 1) % len(wordleEnv.words) new_state, reward, done, _ = wordleEnv.step( wordleEnv.encode_word(wordleEnv.words[word_index])) assert state.remaining_steps(new_state) == wordleEnv.max_turns-1 assert not done assert not wordleEnv.done assert reward == 0 new_state, reward, done, _ = wordleEnv.step(goal) assert state.remaining_steps(new_state) == wordleEnv.max_turns-2 assert done assert wordleEnv.done assert reward == wordle.REWARD try: wordleEnv.step(goal) raise ValueError("Shouldn't reach here!") except ValueError: pass def test_win_reward_6(wordleEnv): wordleEnv.reset(seed=13) goal = wordleEnv.goal_word word_index = (wordleEnv.words.index( wordleEnv.decode_word(goal)) + 1) % len(wordleEnv.words) random_word = wordleEnv.encode_word(wordleEnv.words[word_index]) for i in range(5): new_state, reward, done, _ = wordleEnv.step(random_word) new_state, reward, done, _ = wordleEnv.step(goal) assert wordleEnv.max_turns - state.remaining_steps(new_state) == 6 assert done assert wordleEnv.done assert reward == wordle.REWARD def test_lose_reward(wordleEnv): wordleEnv.reset(seed=13) goal = wordleEnv.goal_word word_index = (wordleEnv.words.index( wordleEnv.decode_word(goal)) + 1) % len(wordleEnv.words) random_word = wordleEnv.encode_word(wordleEnv.words[word_index]) for i in range(1, wordleEnv.max_turns): new_state, reward, done, _ = wordleEnv.step(random_word) assert state.remaining_steps(new_state) == wordleEnv.max_turns-i assert not done assert not wordleEnv.done assert reward == 0 word_index = (wordleEnv.words.index(wordleEnv.decode_word( goal)) + wordleEnv.max_turns) % len(wordleEnv.words) random_word = wordleEnv.encode_word(wordleEnv.words[word_index]) new_state, reward, done, _ = wordleEnv.step(random_word) assert state.remaining_steps(new_state) == 0 assert done assert wordleEnv.done assert reward == -wordle.REWARD try: wordleEnv.step(goal) raise ValueError("Shouldn't reach here!") except ValueError: pass def test_step(wordleEnv): wordleEnv.reset(seed=13) wordleEnv.set_goal_encoded(wordleEnv.encode_word(wordleEnv.words[0])) cur_state = wordleEnv.state new_state, reward, done, _ = wordleEnv.step( wordleEnv.encode_word(wordleEnv.words[1])) assert state.remaining_steps(cur_state) == wordleEnv.max_turns assert state.remaining_steps(new_state) == wordleEnv.max_turns-1 # Expect B to be all 1,0,0 offset = 1+3*5*(ord('B')-ord('A')) assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5) # Expect A to be right in position 0 4 and maybe otherwise offset = 1 assert tuple(new_state[offset:offset+15]) == (0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0) # Expect P to be right in position 2 3 and maybe otherwise offset = 1 + 3*5*(ord('P') - ord('A')) assert tuple(new_state[offset:offset+15]) == (1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0) # Expect C to be maybes offset = 1 + 3*5*(ord('C') - ord('A')) assert tuple(new_state[offset:offset+15]) == (1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0) cur_state = wordleEnv.state new_state, reward, done, _ = wordleEnv.step( wordleEnv.encode_word(wordleEnv.words[1])) assert state.remaining_steps(cur_state) == wordleEnv.max_turns-1 assert state.remaining_steps(new_state) == wordleEnv.max_turns-2 # Expect B to be all 1,0,0 offset = 1+3*5*(ord('B')-ord('A')) assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5) # Expect A to be right in position 0 4 and maybe otherwise offset = 1 assert tuple(new_state[offset:offset+15]) == (0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0) # Expect P to be right in position 2 3 and maybe otherwise offset = 1 + 3*5*(ord('P') - ord('A')) assert tuple(new_state[offset:offset+15]) == (1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0) new_state, reward, done, _ = wordleEnv.step( wordleEnv.encode_word(wordleEnv.words[2])) assert state.remaining_steps(new_state) == wordleEnv.max_turns-3 # Expect B to be all 1,0,0 offset = 1 + 3*5*(ord('B')-ord('A')) assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5) # Expect C to be all 1,0,0 offset = 1+3*5*(ord('C')-ord('A')) assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5) # Expect A to be right in position 0 4 and maybe otherwise offset = 1 assert tuple(new_state[offset:offset+15]) == (0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0) # Expect P to be right in position 2 3 and maybe otherwise offset = 1 + 3*5*(ord('P') - ord('A')) assert tuple(new_state[offset:offset+15]) == (1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0) new_state, reward, done, _ = wordleEnv.step( wordleEnv.encode_word(wordleEnv.words[0])) # Expect A to be right in position 0 4 and 5 offset = 1 assert tuple(new_state[offset:offset+15]) == (0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1) # Expect P to be right in position 2 3 and not otherwise offset = 1 + 3*5*(ord('P') - ord('A')) assert tuple(new_state[offset:offset+15]) == (1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0) assert state.remaining_steps(new_state) == wordleEnv.max_turns-4 assert done assert wordleEnv.done assert reward == wordle.REWARD