wordle-solver / main.py
santit96's picture
Allow to pass hiperparameters as command line arguments
f05ece6
raw
history blame
1.57 kB
import os
import sys
import gym
import matplotlib.pyplot as plt
import torch.multiprocessing as mp
from a3c.discrete_A3C import Net, Worker
from a3c.shared_adam import SharedAdam
from wordle_env.wordle import WordleEnvBase
os.environ["OMP_NUM_THREADS"] = "1"
if __name__ == "__main__":
max_ep = int(sys.argv[1]) if len(sys.argv) > 1 else 100000
env_id = sys.argv[2] if len(sys.argv) > 2 else 'WordleEnv100FullAction-v0'
env = gym.make(env_id)
n_s = env.observation_space.shape[0]
n_a = env.action_space.n
words_list = env.words
word_width = len(env.words[0])
gnet = Net(n_s, n_a, words_list, word_width) # global network
gnet.share_memory() # share the global parameters in multiprocessing
opt = SharedAdam(gnet.parameters(), lr=1e-4, betas=(0.92, 0.999)) # global optimizer
global_ep, global_ep_r, res_queue, win_ep = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue(), mp.Value('i', 0)
# parallel training
workers = [Worker(max_ep, gnet, opt, global_ep, global_ep_r, res_queue, i, env, n_s, n_a, words_list, word_width, win_ep) for i in range(mp.cpu_count())]
[w.start() for w in workers]
res = [] # record episode reward to plot
while True:
r = res_queue.get()
if r is not None:
res.append(r)
else:
break
[w.join() for w in workers]
print("Jugadas:", global_ep.value)
print("Ganadas:", win_ep.value)
plt.plot(res)
plt.ylabel('Moving average ep reward')
plt.xlabel('Step')
plt.show()