wordle-solver / wordle_env /test_wordle.py
santit96's picture
Commiting wordle solver project, state completed, a3c not completed yet
44db2f9
raw
history blame
8.2 kB
import pytest
import wordle
import state
TESTWORDS = [
"APPAA",
"APPAB",
"APPAC",
"APPAD",
"BPPAB",
"BPPAC",
"BPPAD",
"CPPAB",
"CPPAC",
"CPPAD",
]
@pytest.fixture
def wordleEnv():
env = wordle.WordleEnvBase(
words=TESTWORDS,
max_turns=6,
)
return env
def test_reset(wordleEnv):
wordleEnv.reset(seed=13)
def test_guess_win(wordleEnv):
wordleEnv.reset(seed=13)
goal = wordleEnv.goal_word
new_state, reward, done, _ = wordleEnv.step(goal)
assert done
assert wordleEnv.done
assert reward == 0
try:
wordleEnv.step(goal)
raise ValueError("Shouldn't reach here!")
except ValueError:
pass
def test_win_reward(wordleEnv):
wordleEnv.reset(seed=13)
goal = wordleEnv.goal_word
word_index = (wordleEnv.words.index(
wordleEnv.decode_word(goal)) + 1) % len(wordleEnv.words)
new_state, reward, done, _ = wordleEnv.step(
wordleEnv.encode_word(wordleEnv.words[word_index]))
assert state.remaining_steps(new_state) == wordleEnv.max_turns-1
assert not done
assert not wordleEnv.done
assert reward == 0
new_state, reward, done, _ = wordleEnv.step(goal)
assert state.remaining_steps(new_state) == wordleEnv.max_turns-2
assert done
assert wordleEnv.done
assert reward == wordle.REWARD
try:
wordleEnv.step(goal)
raise ValueError("Shouldn't reach here!")
except ValueError:
pass
def test_win_reward_6(wordleEnv):
wordleEnv.reset(seed=13)
goal = wordleEnv.goal_word
word_index = (wordleEnv.words.index(
wordleEnv.decode_word(goal)) + 1) % len(wordleEnv.words)
random_word = wordleEnv.encode_word(wordleEnv.words[word_index])
for i in range(5):
new_state, reward, done, _ = wordleEnv.step(random_word)
new_state, reward, done, _ = wordleEnv.step(goal)
assert wordleEnv.max_turns - state.remaining_steps(new_state) == 6
assert done
assert wordleEnv.done
assert reward == wordle.REWARD
def test_lose_reward(wordleEnv):
wordleEnv.reset(seed=13)
goal = wordleEnv.goal_word
word_index = (wordleEnv.words.index(
wordleEnv.decode_word(goal)) + 1) % len(wordleEnv.words)
random_word = wordleEnv.encode_word(wordleEnv.words[word_index])
for i in range(1, wordleEnv.max_turns):
new_state, reward, done, _ = wordleEnv.step(random_word)
assert state.remaining_steps(new_state) == wordleEnv.max_turns-i
assert not done
assert not wordleEnv.done
assert reward == 0
word_index = (wordleEnv.words.index(wordleEnv.decode_word(
goal)) + wordleEnv.max_turns) % len(wordleEnv.words)
random_word = wordleEnv.encode_word(wordleEnv.words[word_index])
new_state, reward, done, _ = wordleEnv.step(random_word)
assert state.remaining_steps(new_state) == 0
assert done
assert wordleEnv.done
assert reward == -wordle.REWARD
try:
wordleEnv.step(goal)
raise ValueError("Shouldn't reach here!")
except ValueError:
pass
def test_step(wordleEnv):
wordleEnv.reset(seed=13)
wordleEnv.set_goal_encoded(wordleEnv.encode_word(wordleEnv.words[0]))
cur_state = wordleEnv.state
new_state, reward, done, _ = wordleEnv.step(
wordleEnv.encode_word(wordleEnv.words[1]))
assert state.remaining_steps(cur_state) == wordleEnv.max_turns
assert state.remaining_steps(new_state) == wordleEnv.max_turns-1
# Expect B to be all 1,0,0
offset = 1+3*5*(ord('B')-ord('A'))
assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5)
# Expect A to be right in position 0 4 and maybe otherwise
offset = 1
assert tuple(new_state[offset:offset+15]) == (0, 0, 1,
1, 0, 0,
1, 0, 0,
0, 0, 1,
0, 0, 0)
# Expect P to be right in position 2 3 and maybe otherwise
offset = 1 + 3*5*(ord('P') - ord('A'))
assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
0, 0, 1,
0, 0, 1,
1, 0, 0,
0, 0, 0)
# Expect C to be maybes
offset = 1 + 3*5*(ord('C') - ord('A'))
assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
1, 0, 0,
1, 0, 0,
1, 0, 0,
0, 0, 0)
cur_state = wordleEnv.state
new_state, reward, done, _ = wordleEnv.step(
wordleEnv.encode_word(wordleEnv.words[1]))
assert state.remaining_steps(cur_state) == wordleEnv.max_turns-1
assert state.remaining_steps(new_state) == wordleEnv.max_turns-2
# Expect B to be all 1,0,0
offset = 1+3*5*(ord('B')-ord('A'))
assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5)
# Expect A to be right in position 0 4 and maybe otherwise
offset = 1
assert tuple(new_state[offset:offset+15]) == (0, 0, 1,
1, 0, 0,
1, 0, 0,
0, 0, 1,
0, 0, 0)
# Expect P to be right in position 2 3 and maybe otherwise
offset = 1 + 3*5*(ord('P') - ord('A'))
assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
0, 0, 1,
0, 0, 1,
1, 0, 0,
0, 0, 0)
new_state, reward, done, _ = wordleEnv.step(
wordleEnv.encode_word(wordleEnv.words[2]))
assert state.remaining_steps(new_state) == wordleEnv.max_turns-3
# Expect B to be all 1,0,0
offset = 1 + 3*5*(ord('B')-ord('A'))
assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5)
# Expect C to be all 1,0,0
offset = 1+3*5*(ord('C')-ord('A'))
assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5)
# Expect A to be right in position 0 4 and maybe otherwise
offset = 1
assert tuple(new_state[offset:offset+15]) == (0, 0, 1,
1, 0, 0,
1, 0, 0,
0, 0, 1,
0, 0, 0)
# Expect P to be right in position 2 3 and maybe otherwise
offset = 1 + 3*5*(ord('P') - ord('A'))
assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
0, 0, 1,
0, 0, 1,
1, 0, 0,
0, 0, 0)
new_state, reward, done, _ = wordleEnv.step(
wordleEnv.encode_word(wordleEnv.words[0]))
# Expect A to be right in position 0 4 and 5
offset = 1
assert tuple(new_state[offset:offset+15]) == (0, 0, 1,
1, 0, 0,
1, 0, 0,
0, 0, 1,
0, 0, 1)
# Expect P to be right in position 2 3 and not otherwise
offset = 1 + 3*5*(ord('P') - ord('A'))
assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
0, 0, 1,
0, 0, 1,
1, 0, 0,
1, 0, 0)
assert state.remaining_steps(new_state) == wordleEnv.max_turns-4
assert done
assert wordleEnv.done
assert reward == wordle.REWARD