santit96 commited on
Commit
335cc71
·
1 Parent(s): 23fd1ff

Add reward for correct letter no matter if it has been rewarded before

Browse files
Files changed (1) hide show
  1. wordle_env/state.py +2 -2
wordle_env/state.py CHANGED
@@ -148,8 +148,7 @@ def update(state: WordleState, word: str, goal_word: str) -> Tuple[WordleState,
148
  offset = 1 + cint * WORDLE_N * 3
149
  if goal_word[i] == c:
150
  # char at position i = yes, all other chars at position i == no
151
- if state[offset + 3 * i:offset + 3 * i + 3][2] == 0:
152
- reward += CHAR_REWARD
153
  state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
154
  for ocint in range(len(WORDLE_CHARS)):
155
  if ocint != cint:
@@ -165,6 +164,7 @@ def update(state: WordleState, word: str, goal_word: str) -> Tuple[WordleState,
165
  # Char at position i = no, and in other positions maybe, other chars stay as they are
166
  state[offset:offset + 3 * WORDLE_N] = [0, 1, 0] * WORDLE_N
167
  state[offset + 3 * i:offset + 3 * i + 3] = [1, 0, 0]
 
168
  else:
169
  # Char at all positions = no
170
  state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
 
148
  offset = 1 + cint * WORDLE_N * 3
149
  if goal_word[i] == c:
150
  # char at position i = yes, all other chars at position i == no
151
+ reward += CHAR_REWARD
 
152
  state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
153
  for ocint in range(len(WORDLE_CHARS)):
154
  if ocint != cint:
 
164
  # Char at position i = no, and in other positions maybe, other chars stay as they are
165
  state[offset:offset + 3 * WORDLE_N] = [0, 1, 0] * WORDLE_N
166
  state[offset + 3 * i:offset + 3 * i + 3] = [1, 0, 0]
167
+ reward += CHAR_REWARD * 0.1
168
  else:
169
  # Char at all positions = no
170
  state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N