File size: 5,024 Bytes
b89a51c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from __future__ import absolute_import, division, print_function, unicode_literals
from car_dqn import CarRacingDQN
import os
import tensorflow as tf
import gym
import _thread
import re
import sys
import numpy as np

#Ensure its running og GPU
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

load_checkpoint = True
checkpoint_path = "data/checkpoints/train24"
train_episodes = 15000
save_freq_episodes = train_episodes/100 ###############333
finished = False
opendir = checkpoint_path + '.txt'
text_results = open(opendir, "w")
render = False

frame_skip = 3 #frame_skip number n. model is trained n to n times only
model_config = dict(
    min_epsilon=0.05,
    max_negative_rewards=8,
    min_experience_size=int(100), #######################################33
    experience_capacity=int(150000),
    num_frame_stack=frame_skip,
    frame_skip=frame_skip,
    train_freq=frame_skip,
    batchsize=64,
    epsilon_decay_steps=int(100000),
    target_network_update_freq=int(1000), #Updates the target network every 10000 global steps by copying them from the prediction network to the target network
    gamma=0.95,
    render=False,
)

dqn_scores = []
eps_history = []
avg_score_all = [0]

env = gym.make('CarRacing-v0',  verbose=False)

tf.compat.v1.reset_default_graph

dqn_agent = CarRacingDQN(env=env, **model_config)
dqn_agent.build_graph()
sess = tf.InteractiveSession()
dqn_agent.session = sess

#Initialize save checkpoints
saver = tf.train.Saver(max_to_keep=1000) #max number of checkpoints = 500
#Choice to load checkpoints
if load_checkpoint:
    train_episodes = 150
    save_freq_episodes = 0
    print("loading the latest checkpoint from %s" % checkpoint_path)
    ckpt = tf.train.get_checkpoint_state(checkpoint_path)
    assert ckpt, "checkpoint path %s not found" % checkpoint_path
    global_counter = int(re.findall("-(\d+)$", ckpt.model_checkpoint_path)[0])
    saver.restore(sess, ckpt.model_checkpoint_path)
    dqn_agent.global_counter = global_counter
    render = True
else:
    if checkpoint_path is not None:
        assert not os.path.exists(checkpoint_path), \
            "checkpoint path already exists but load_checkpoint is false"

    tf.global_variables_initializer().run()


def save_checkpoint():
    if not os.path.exists(checkpoint_path):
        os.makedirs(checkpoint_path)
    p = os.path.join(checkpoint_path, "m.ckpt")
    saver.save(sess, p, dqn_agent.global_counter)
    print("saved to %s - %d" % (p, dqn_agent.global_counter))


def one_episode(eps_history,dqn_scores,avg_score_all,render,load_checkpoint):
    score, reward, frames, epsilon = dqn_agent.play_episode(render, load_checkpoint)

    eps_history.append(epsilon)

    dqn_scores.append(score)
    i = dqn_agent.episode_counter
    avg_score = np.mean(dqn_scores[max(0, i - 100):(i + 1)])
    avg_score_all.append(avg_score)
    max_avg_score = max(avg_score_all)
    if avg_score >= max_avg_score:
        new_max = ' => New HighScore! <= '
        highscore = True
    else:
        new_max = ''
        highscore = False



    strm = ("#> episode: %i | score: %.2f | total steps: %i | epsilon: %.5f | average 100 score: %.2f" %
            (i, score, dqn_agent.global_counter, epsilon, avg_score))

    print(strm + new_max)

    text_results = open(opendir, "a")
    text_results.write(strm + new_max + '\n')
    text_results.close()

    if not load_checkpoint:
        save_cond = (
            dqn_agent.episode_counter % save_freq_episodes == 0
            and checkpoint_path is not None
            and dqn_agent.do_training
        )
        if save_cond or (highscore and dqn_agent.episode_counter > 100):
            save_checkpoint()

    return eps_history,dqn_scores,avg_score_all

def input_thread(list):
    input("...enter to stop after current episode\n")
    list.append("OK")


def main_loop(eps_history,dqn_scores,avg_score_all,render,load_checkpoint):
    #call training loop
    list = []
    _thread.start_new_thread(input_thread, (list,))
    while True:
        if list:
            break
        if dqn_agent.do_training and dqn_agent.episode_counter >= train_episodes:

            break
        eps_history,dqn_scores,avg_score_all = one_episode(eps_history,dqn_scores,avg_score_all,render,load_checkpoint)

    print("done")
    text_results.close()
    exit()
    return eps_history,dqn_scores,avg_score_all


if train_episodes > 0 and dqn_agent.episode_counter < train_episodes and not load_checkpoint :
    print("now training... you can early stop with enter...")
    print("##########")
    sys.stdout.flush()
    main_loop(eps_history,dqn_scores,avg_score_all,render,load_checkpoint)
    save_checkpoint()
    print("ok training done")

else:
    print("now just playing...")
    sys.stdout.flush()
    main_loop(eps_history,dqn_scores,avg_score_all,render,load_checkpoint)