Spaces:
Sleeping
Sleeping
Add correct char reward as a constant
Browse files- wordle_env/const.py +1 -0
- wordle_env/state.py +4 -4
wordle_env/const.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
WORDLE_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
2 |
WORDLE_N = 5
|
3 |
REWARD = 10
|
|
|
|
1 |
WORDLE_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
2 |
WORDLE_N = 5
|
3 |
REWARD = 10
|
4 |
+
CHAR_REWARD = 0.1
|
wordle_env/state.py
CHANGED
@@ -12,10 +12,10 @@ where status has codes
|
|
12 |
[0, 0, 1] - char is definitely in this spot
|
13 |
"""
|
14 |
import collections
|
15 |
-
from typing import List
|
16 |
import numpy as np
|
17 |
|
18 |
-
from .const import WORDLE_CHARS, WORDLE_N
|
19 |
|
20 |
|
21 |
WordleState = np.ndarray
|
@@ -138,7 +138,7 @@ def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
|
|
138 |
return update_from_mask(state, word, mask)
|
139 |
|
140 |
|
141 |
-
def update(state: WordleState, word: str, goal_word: str) -> WordleState:
|
142 |
state = state.copy()
|
143 |
reward = 0
|
144 |
state[0] -= 1
|
@@ -149,7 +149,7 @@ def update(state: WordleState, word: str, goal_word: str) -> WordleState:
|
|
149 |
if goal_word[i] == c:
|
150 |
# char at position i = yes, all other chars at position i == no
|
151 |
if state[offset + 3 * i:offset + 3 * i + 3][2] == 0:
|
152 |
-
reward +=
|
153 |
state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
|
154 |
for ocint in range(len(WORDLE_CHARS)):
|
155 |
if ocint != cint:
|
|
|
12 |
[0, 0, 1] - char is definitely in this spot
|
13 |
"""
|
14 |
import collections
|
15 |
+
from typing import List, Tuple
|
16 |
import numpy as np
|
17 |
|
18 |
+
from .const import CHAR_REWARD, WORDLE_CHARS, WORDLE_N
|
19 |
|
20 |
|
21 |
WordleState = np.ndarray
|
|
|
138 |
return update_from_mask(state, word, mask)
|
139 |
|
140 |
|
141 |
+
def update(state: WordleState, word: str, goal_word: str) -> Tuple(WordleState, float):
|
142 |
state = state.copy()
|
143 |
reward = 0
|
144 |
state[0] -= 1
|
|
|
149 |
if goal_word[i] == c:
|
150 |
# char at position i = yes, all other chars at position i == no
|
151 |
if state[offset + 3 * i:offset + 3 * i + 3][2] == 0:
|
152 |
+
reward += CHAR_REWARD
|
153 |
state[offset + 3 * i:offset + 3 * i + 3] = [0, 0, 1]
|
154 |
for ocint in range(len(WORDLE_CHARS)):
|
155 |
if ocint != cint:
|