santit96 commited on
Commit
86f1c6b
·
1 Parent(s): b8d0d32

Add correct char reward as a constant

Browse files
Files changed (2) hide show
  1. wordle_env/const.py +1 -0
  2. wordle_env/state.py +4 -4
wordle_env/const.py CHANGED
@@ -1,3 +1,4 @@
1
  WORDLE_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
2
  WORDLE_N = 5
3
  REWARD = 10
 
 
1
  WORDLE_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
2
  WORDLE_N = 5
3
  REWARD = 10
4
+ CHAR_REWARD = 0.1
wordle_env/state.py CHANGED
@@ -12,10 +12,10 @@ where status has codes
12
  [0, 0, 1] - char is definitely in this spot
13
  """
14
  import collections
15
- from typing import List
16
  import numpy as np
17
 
18
- from .const import WORDLE_CHARS, WORDLE_N
19
 
20
 
21
  WordleState = np.ndarray
@@ -138,7 +138,7 @@ def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
138
  return update_from_mask(state, word, mask)
139
 
140
 
141
- def update(state: WordleState, word: str, goal_word: str) -> WordleState:
142
  state = state.copy()
143
  reward = 0
144
  state[0] -= 1
@@ -149,7 +149,7 @@ def update(state: WordleState, word: str, goal_word: str) -> WordleState:
149
  if goal_word[i] == c:
150
  # char at position i = yes, all other chars at position i == no
151
  if state[offset + 3 * i:offset + 3 * i + 3][2] == 0:
152
- reward += 0.1
153
  state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
154
  for ocint in range(len(WORDLE_CHARS)):
155
  if ocint != cint:
 
12
  [0, 0, 1] - char is definitely in this spot
13
  """
14
  import collections
15
+ from typing import List, Tuple
16
  import numpy as np
17
 
18
+ from .const import CHAR_REWARD, WORDLE_CHARS, WORDLE_N
19
 
20
 
21
  WordleState = np.ndarray
 
138
  return update_from_mask(state, word, mask)
139
 
140
 
141
+ def update(state: WordleState, word: str, goal_word: str) -> Tuple(WordleState, float):
142
  state = state.copy()
143
  reward = 0
144
  state[0] -= 1
 
149
  if goal_word[i] == c:
150
  # char at position i = yes, all other chars at position i == no
151
  if state[offset + 3 * i:offset + 3 * i + 3][2] == 0:
152
+ reward += CHAR_REWARD
153
  state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
154
  for ocint in range(len(WORDLE_CHARS)):
155
  if ocint != cint: