santit96 commited on
Commit
29cd0c4
·
1 Parent(s): 01007c3

Fix bug in state update

Browse files

Fix tests to include that bug test and refactor step_test

Files changed (2) hide show
  1. wordle_env/state.py +5 -2
  2. wordle_env/test_wordle.py +85 -82
wordle_env/state.py CHANGED
@@ -161,8 +161,11 @@ def update(state: WordleState, word: str, goal_word: str) -> Tuple[WordleState,
161
  offset = 1 + cint * WORDLE_N * 3
162
  if goal_word[i] != c:
163
  if c in goal_word and goal_word.count(c) > processed_letters.count(c):
164
- # Char at position i = no, and in other positions maybe, other chars stay as they are
165
- state[offset:offset + 3 * WORDLE_N] = [0, 1, 0] * WORDLE_N
 
 
 
166
  state[offset + 3 * i:offset + 3 * i + 3] = [1, 0, 0]
167
  reward += CHAR_REWARD * 0.1
168
  else:
 
161
  offset = 1 + cint * WORDLE_N * 3
162
  if goal_word[i] != c:
163
  if c in goal_word and goal_word.count(c) > processed_letters.count(c):
164
+ # Char at position i = no, and in other positions maybe except it had a value before, other chars stay as they are
165
+ for char_idx in range(0, WORDLE_N * 3, 3):
166
+ char_offset = offset + char_idx
167
+ if tuple(state[char_offset: char_offset + 3]) == (0, 0, 0):
168
+ state[char_offset: char_offset + 3] = [0, 1, 0]
169
  state[offset + 3 * i:offset + 3 * i + 3] = [1, 0, 0]
170
  reward += CHAR_REWARD * 0.1
171
  else:
wordle_env/test_wordle.py CHANGED
@@ -7,7 +7,7 @@ TESTWORDS = [
7
  "APPAA",
8
  "APPAB",
9
  "APPAC",
10
- "APPAD",
11
 
12
  "BPPAB",
13
  "BPPAC",
@@ -111,6 +111,9 @@ def test_lose_reward(wordleEnv):
111
  except ValueError:
112
  pass
113
 
 
 
 
114
 
115
  def test_step(wordleEnv):
116
  wordleEnv.reset(seed=13)
@@ -121,98 +124,98 @@ def test_step(wordleEnv):
121
  assert state.remaining_steps(cur_state) == wordleEnv.max_turns
122
  assert state.remaining_steps(new_state) == wordleEnv.max_turns-1
123
  # Expect B to be all 1,0,0
124
- offset = 1+3*5*(ord('B')-ord('A'))
125
- assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5)
126
-
127
- # Expect A to be right in position 0 4 and maybe otherwise
128
- offset = 1
129
- assert tuple(new_state[offset:offset+15]) == (0, 0, 1,
130
- 1, 0, 0,
131
- 1, 0, 0,
132
- 0, 0, 1,
133
- 0, 0, 0)
134
-
135
- # Expect P to be right in position 2 3 and maybe otherwise
136
- offset = 1 + 3*5*(ord('P') - ord('A'))
137
- assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
138
- 0, 0, 1,
139
- 0, 0, 1,
140
- 1, 0, 0,
141
- 0, 0, 0)
142
-
143
- # Expect C to be maybes
144
- offset = 1 + 3*5*(ord('C') - ord('A'))
145
- assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
146
- 1, 0, 0,
147
- 1, 0, 0,
148
- 1, 0, 0,
149
- 0, 0, 0)
150
  cur_state = wordleEnv.state
151
- new_state, reward, done, _ = wordleEnv.step(1)
152
  assert state.remaining_steps(cur_state) == wordleEnv.max_turns-1
153
  assert state.remaining_steps(new_state) == wordleEnv.max_turns-2
154
- # Expect B to be all 1,0,0
155
- offset = 1+3*5*(ord('B')-ord('A'))
156
- assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5)
157
-
158
- # Expect A to be right in position 0 4 and maybe otherwise
159
- offset = 1
160
- assert tuple(new_state[offset:offset+15]) == (0, 0, 1,
161
- 1, 0, 0,
162
- 1, 0, 0,
163
- 0, 0, 1,
164
- 0, 0, 0)
165
-
166
- # Expect P to be right in position 2 3 and maybe otherwise
167
- offset = 1 + 3*5*(ord('P') - ord('A'))
168
- assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
169
- 0, 0, 1,
170
- 0, 0, 1,
171
- 1, 0, 0,
172
- 0, 0, 0)
 
173
 
174
  new_state, reward, done, _ = wordleEnv.step(2)
175
  assert state.remaining_steps(new_state) == wordleEnv.max_turns-3
176
  # Expect B to be all 1,0,0
177
- offset = 1 + 3*5*(ord('B')-ord('A'))
178
- assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5)
179
 
180
  # Expect C to be all 1,0,0
181
- offset = 1+3*5*(ord('C')-ord('A'))
182
- assert tuple(new_state[offset:offset+15]) == tuple([1, 0, 0]*5)
183
-
184
- # Expect A to be right in position 0 4 and maybe otherwise
185
- offset = 1
186
- assert tuple(new_state[offset:offset+15]) == (0, 0, 1,
187
- 1, 0, 0,
188
- 1, 0, 0,
189
- 0, 0, 1,
190
- 0, 0, 0)
191
-
192
- # Expect P to be right in position 2 3 and maybe otherwise
193
- offset = 1 + 3*5*(ord('P') - ord('A'))
194
- assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
195
- 0, 0, 1,
196
- 0, 0, 1,
197
- 1, 0, 0,
198
- 0, 0, 0)
199
 
200
  new_state, reward, done, _ = wordleEnv.step(0)
201
- # Expect A to be right in position 0 4 and 5
202
- offset = 1
203
- assert tuple(new_state[offset:offset+15]) == (0, 0, 1,
204
- 1, 0, 0,
205
- 1, 0, 0,
206
- 0, 0, 1,
207
- 0, 0, 1)
208
-
209
- # Expect P to be right in position 2 3 and not otherwise
210
- offset = 1 + 3*5*(ord('P') - ord('A'))
211
- assert tuple(new_state[offset:offset+15]) == (1, 0, 0,
212
- 0, 0, 1,
213
- 0, 0, 1,
214
- 1, 0, 0,
215
- 1, 0, 0)
216
  assert state.remaining_steps(new_state) == wordleEnv.max_turns-4
217
  assert done
218
  assert wordleEnv.done
 
7
  "APPAA",
8
  "APPAB",
9
  "APPAC",
10
+ "APAPD",
11
 
12
  "BPPAB",
13
  "BPPAC",
 
111
  except ValueError:
112
  pass
113
 
114
+ def letter_test(char, state, letter_state):
115
+ offset = 1+3*5*(ord(char)-ord('A'))
116
+ assert tuple(state[offset:offset+15]) == letter_state
117
 
118
  def test_step(wordleEnv):
119
  wordleEnv.reset(seed=13)
 
124
  assert state.remaining_steps(cur_state) == wordleEnv.max_turns
125
  assert state.remaining_steps(new_state) == wordleEnv.max_turns-1
126
  # Expect B to be all 1,0,0
127
+ letter_test('B', new_state, tuple([1, 0, 0]*5))
128
+
129
+ # Expect A to be right in position 0 3, no in 1 2 and 0 otherwise
130
+ letter_state = (0, 0, 1,
131
+ 1, 0, 0,
132
+ 1, 0, 0,
133
+ 0, 0, 1,
134
+ 0, 0, 0)
135
+ letter_test('A', new_state, letter_state)
136
+
137
+ # Expect P to be right in position 1 2, no in 0 2 and 0 otherwise
138
+ letter_state = (1, 0, 0,
139
+ 0, 0, 1,
140
+ 0, 0, 1,
141
+ 1, 0, 0,
142
+ 0, 0, 0)
143
+ letter_test('P', new_state, letter_state)
144
+
145
+ # Expect C to be no everywhere except in 4
146
+ letter_state = (1, 0, 0,
147
+ 1, 0, 0,
148
+ 1, 0, 0,
149
+ 1, 0, 0,
150
+ 0, 0, 0)
151
+ letter_test('C', new_state, letter_state)
 
152
  cur_state = wordleEnv.state
153
+ new_state, reward, done, _ = wordleEnv.step(3)
154
  assert state.remaining_steps(cur_state) == wordleEnv.max_turns-1
155
  assert state.remaining_steps(new_state) == wordleEnv.max_turns-2
156
+ # Expect D to be all 1,0,0
157
+ letter_state = tuple([1, 0, 0]*5)
158
+ letter_test('D', new_state, letter_state)
159
+
160
+ # Expect A to be right in position 0 3 and Maybe in 4
161
+ letter_state = (0, 0, 1,
162
+ 1, 0, 0,
163
+ 1, 0, 0,
164
+ 0, 0, 1,
165
+ 0, 1, 0)
166
+ print(new_state, letter_state)
167
+ letter_test('A', new_state, letter_state)
168
+
169
+ # Expect P to be right in position 1 2 no in 3 and maybe in 4
170
+ letter_state = (1, 0, 0,
171
+ 0, 0, 1,
172
+ 0, 0, 1,
173
+ 1, 0, 0,
174
+ 0, 1, 0)
175
+ letter_test('P', new_state, letter_state)
176
 
177
  new_state, reward, done, _ = wordleEnv.step(2)
178
  assert state.remaining_steps(new_state) == wordleEnv.max_turns-3
179
  # Expect B to be all 1,0,0
180
+ letter_state = tuple([1, 0, 0]*5)
181
+ letter_test('C', new_state, letter_state)
182
 
183
  # Expect C to be all 1,0,0
184
+ letter_state = tuple([1, 0, 0]*5)
185
+ letter_test('C', new_state, letter_state)
186
+
187
+ # Expect A to be right in position 0 3 and, no 3 and maybe otherwise
188
+ letter_state = (0, 0, 1,
189
+ 1, 0, 0,
190
+ 1, 0, 0,
191
+ 0, 0, 1,
192
+ 0, 1, 0)
193
+ letter_test('A', new_state, letter_state)
194
+
195
+ # Expect P to be right in position 1 2, no in 0 3 and maybe otherwise
196
+ letter_state = (1, 0, 0,
197
+ 0, 0, 1,
198
+ 0, 0, 1,
199
+ 1, 0, 0,
200
+ 0, 1, 0)
201
+ letter_test('P', new_state, letter_state)
202
 
203
  new_state, reward, done, _ = wordleEnv.step(0)
204
+ # Expect A to be right in position 0 3 and 4
205
+ letter_state = (0, 0, 1,
206
+ 1, 0, 0,
207
+ 1, 0, 0,
208
+ 0, 0, 1,
209
+ 0, 0, 1)
210
+ letter_test('A', new_state, letter_state)
211
+
212
+ # Expect P to be right in position 1 2 and not otherwise
213
+ letter_state = (1, 0, 0,
214
+ 0, 0, 1,
215
+ 0, 0, 1,
216
+ 1, 0, 0,
217
+ 1, 0, 0)
218
+ letter_test('P', new_state, letter_state)
219
  assert state.remaining_steps(new_state) == wordleEnv.max_turns-4
220
  assert done
221
  assert wordleEnv.done