santit96 commited on
Commit
d9e6245
·
1 Parent(s): 21456ba

Add test to evaluate complex cases

Browse files

Update state update to take into account those complex cases

wordle_env/state.py CHANGED
@@ -170,5 +170,8 @@ def update(state: WordleState, word: str, goal_word: str) -> Tuple[WordleState,
170
  elif c not in goal_word:
171
  # Char at all positions = no
172
  state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
 
 
 
173
  processed_letters.append(c)
174
  return state, reward
 
170
  elif c not in goal_word:
171
  # Char at all positions = no
172
  state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
173
+ else: # goal_word.count(c) <= processed_letters.count(c) and goal in word
174
+ # Only information at this point is that char at position i = no
175
+ state[offset + 3 * i:offset + 3 * i + 3] = [1, 0, 0]
176
  processed_letters.append(c)
177
  return state, reward
wordle_env/test_wordle.py CHANGED
@@ -10,12 +10,10 @@ TESTWORDS = [
10
  "APAPD",
11
 
12
  "BPPAB",
13
- "BPPAC",
14
- "BPPAD",
15
-
16
- "CPPAB",
17
- "CPPAC",
18
- "CPPAD",
19
  ]
20
 
21
 
@@ -29,11 +27,11 @@ def wordleEnv():
29
 
30
 
31
  def test_reset(wordleEnv):
32
- wordleEnv.reset(seed=13)
33
 
34
 
35
  def test_guess_win(wordleEnv):
36
- wordleEnv.reset(seed=13)
37
  goal = wordleEnv.goal_word
38
  new_state, reward, done, _ = wordleEnv.step(goal)
39
  assert done
@@ -48,7 +46,7 @@ def test_guess_win(wordleEnv):
48
 
49
 
50
  def test_win_reward(wordleEnv):
51
- wordleEnv.reset(seed=13)
52
  goal = wordleEnv.goal_word
53
  word_index = (goal + 1) % len(wordleEnv.words)
54
  new_state, reward, done, _ = wordleEnv.step(word_index)
@@ -71,7 +69,7 @@ def test_win_reward(wordleEnv):
71
 
72
 
73
  def test_win_reward_6(wordleEnv):
74
- wordleEnv.reset(seed=13)
75
  goal = wordleEnv.goal_word
76
  random_word = (goal + 1) % len(wordleEnv.words)
77
 
@@ -87,7 +85,7 @@ def test_win_reward_6(wordleEnv):
87
 
88
 
89
  def test_lose_reward(wordleEnv):
90
- wordleEnv.reset(seed=13)
91
  goal = wordleEnv.goal_word
92
  random_word = (goal + 1) % len(wordleEnv.words)
93
  for i in range(1, wordleEnv.max_turns):
@@ -116,7 +114,7 @@ def letter_test(char, state, letter_state):
116
  assert tuple(state[offset:offset+15]) == letter_state
117
 
118
  def test_step(wordleEnv):
119
- wordleEnv.reset(seed=13)
120
  wordleEnv.set_goal_encoded(0)
121
 
122
  cur_state = wordleEnv.state
@@ -220,3 +218,76 @@ def test_step(wordleEnv):
220
  assert done
221
  assert wordleEnv.done
222
  assert reward == wordle.REWARD
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "APAPD",
11
 
12
  "BPPAB",
13
+ "PPAPB",
14
+ "PPBBA",
15
+ "BPABB",
16
+ "PPPAC"
 
 
17
  ]
18
 
19
 
 
27
 
28
 
29
  def test_reset(wordleEnv):
30
+ wordleEnv.reset()
31
 
32
 
33
  def test_guess_win(wordleEnv):
34
+ wordleEnv.reset()
35
  goal = wordleEnv.goal_word
36
  new_state, reward, done, _ = wordleEnv.step(goal)
37
  assert done
 
46
 
47
 
48
  def test_win_reward(wordleEnv):
49
+ wordleEnv.reset()
50
  goal = wordleEnv.goal_word
51
  word_index = (goal + 1) % len(wordleEnv.words)
52
  new_state, reward, done, _ = wordleEnv.step(word_index)
 
69
 
70
 
71
  def test_win_reward_6(wordleEnv):
72
+ wordleEnv.reset()
73
  goal = wordleEnv.goal_word
74
  random_word = (goal + 1) % len(wordleEnv.words)
75
 
 
85
 
86
 
87
  def test_lose_reward(wordleEnv):
88
+ wordleEnv.reset()
89
  goal = wordleEnv.goal_word
90
  random_word = (goal + 1) % len(wordleEnv.words)
91
  for i in range(1, wordleEnv.max_turns):
 
114
  assert tuple(state[offset:offset+15]) == letter_state
115
 
116
  def test_step(wordleEnv):
117
+ wordleEnv.reset()
118
  wordleEnv.set_goal_encoded(0)
119
 
120
  cur_state = wordleEnv.state
 
218
  assert done
219
  assert wordleEnv.done
220
  assert reward == wordle.REWARD
221
+
222
+ def test_special_step_cases(wordleEnv):
223
+ wordleEnv.reset()
224
+ wordleEnv.set_goal_encoded(4)
225
+ # BPPAB - goal
226
+ # PPAPB - 1st guess
227
+ new_state, _, _, _ = wordleEnv.step(5)
228
+ # Expect A to be all maybe except 2, 1 and 4 that are no
229
+ letter_state = (0, 1, 0,
230
+ 1, 0, 0,
231
+ 1, 0, 0,
232
+ 0, 1, 0,
233
+ 1, 0, 0)
234
+ letter_test('A', new_state, letter_state)
235
+ # Expect B to be all 0 except 4 that is yes and 1 that is no
236
+ letter_state = (0, 0, 0,
237
+ 1, 0, 0,
238
+ 0, 0, 0,
239
+ 0, 0, 0,
240
+ 0, 0, 1)
241
+ letter_test('B', new_state, letter_state)
242
+ # Expect P to be yes at 1, maybe at 2 and 4, and no in 0, 3 and 4
243
+ letter_state = (1, 0, 0,
244
+ 0, 0, 1,
245
+ 0, 1, 0,
246
+ 1, 0, 0,
247
+ 1, 0, 0)
248
+ letter_test('P', new_state, letter_state)
249
+ # BPPAB - goal
250
+ # PPAPB - 1st guess
251
+ # PPBBA - 2nd guess
252
+ new_state, _, _, _ = wordleEnv.step(6)
253
+ # Expect A to be all maybe except 2, 1 and 4 that are no
254
+ letter_state = (0, 1, 0,
255
+ 1, 0, 0,
256
+ 1, 0, 0,
257
+ 0, 1, 0,
258
+ 1, 0, 0)
259
+ letter_test('A', new_state, letter_state)
260
+ # Expect B to be maybe at 0, yes at 4, and 1 2 and 3 are no
261
+ letter_state = (0, 1, 0,
262
+ 1, 0, 0,
263
+ 1, 0, 0,
264
+ 1, 0, 0,
265
+ 0, 0, 1)
266
+ letter_test('B', new_state, letter_state)
267
+ # Expect P to be yes at 1, maybe at 2, and no in 0, 3 and 4
268
+ letter_state = (1, 0, 0,
269
+ 0, 0, 1,
270
+ 0, 1, 0,
271
+ 1, 0, 0,
272
+ 1, 0, 0)
273
+ letter_test('P', new_state, letter_state)
274
+
275
+ wordleEnv.reset()
276
+ wordleEnv.set_goal_encoded(7)
277
+ # BPABB - goal
278
+ # PPPAC - 1st guess
279
+ new_state, _, _, _ = wordleEnv.step(8)
280
+ # Expect A to be all maybe except 1 and 3 that is no
281
+ letter_state = (0, 1, 0,
282
+ 1, 0, 0,
283
+ 0, 1, 0,
284
+ 1, 0, 0,
285
+ 0, 1, 0)
286
+ letter_test('A', new_state, letter_state)
287
+ # Expect P to be yes at 1, maybe at 3 and 4 and no otherwise
288
+ letter_state = (1, 0, 0,
289
+ 0, 0, 1,
290
+ 1, 0, 0,
291
+ 0, 0, 0,
292
+ 0, 0, 0)
293
+ letter_test('P', new_state, letter_state)
wordle_env/wordle.py CHANGED
@@ -89,7 +89,7 @@ class WordleEnvBase(gym.Env):
89
 
90
  return self.state.copy(), reward, self.done, {"goal_id": self.goal_word}
91
 
92
- def reset(self, seed: Optional[int] = None):
93
  self.state = state.new(self.max_turns)
94
  self.done = False
95
  random_word = random.choice(self.words[:self.allowable_words])
 
89
 
90
  return self.state.copy(), reward, self.done, {"goal_id": self.goal_word}
91
 
92
+ def reset(self):
93
  self.state = state.new(self.max_turns)
94
  self.done = False
95
  random_word = random.choice(self.words[:self.allowable_words])