Spaces:
Sleeping
Sleeping
Add reward for correct letter no matter if it has been rewarded before
Browse files- wordle_env/state.py +2 -2
wordle_env/state.py
CHANGED
@@ -148,8 +148,7 @@ def update(state: WordleState, word: str, goal_word: str) -> Tuple[WordleState,
|
|
148 |
offset = 1 + cint * WORDLE_N * 3
|
149 |
if goal_word[i] == c:
|
150 |
# char at position i = yes, all other chars at position i == no
|
151 |
-
|
152 |
-
reward += CHAR_REWARD
|
153 |
state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
|
154 |
for ocint in range(len(WORDLE_CHARS)):
|
155 |
if ocint != cint:
|
@@ -165,6 +164,7 @@ def update(state: WordleState, word: str, goal_word: str) -> Tuple[WordleState,
|
|
165 |
# Char at position i = no, and in other positions maybe, other chars stay as they are
|
166 |
state[offset:offset + 3 * WORDLE_N] = [0, 1, 0] * WORDLE_N
|
167 |
state[offset + 3 * i:offset + 3 * i + 3] = [1, 0, 0]
|
|
|
168 |
else:
|
169 |
# Char at all positions = no
|
170 |
state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
|
|
|
148 |
offset = 1 + cint * WORDLE_N * 3
|
149 |
if goal_word[i] == c:
|
150 |
# char at position i = yes, all other chars at position i == no
|
151 |
+
reward += CHAR_REWARD
|
|
|
152 |
state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
|
153 |
for ocint in range(len(WORDLE_CHARS)):
|
154 |
if ocint != cint:
|
|
|
164 |
# Char at position i = no, and in other positions maybe, other chars stay as they are
|
165 |
state[offset:offset + 3 * WORDLE_N] = [0, 1, 0] * WORDLE_N
|
166 |
state[offset + 3 * i:offset + 3 * i + 3] = [1, 0, 0]
|
167 |
+
reward += CHAR_REWARD * 0.1
|
168 |
else:
|
169 |
# Char at all positions = no
|
170 |
state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
|