File size: 5,386 Bytes
84e7348
 
 
 
 
 
 
 
 
bb16116
 
84e7348
 
bb16116
84e7348
bb16116
 
 
 
84e7348
bb16116
84e7348
 
bb16116
 
 
 
84e7348
bb16116
 
 
84e7348
bb16116
 
 
 
 
 
 
84e7348
bb16116
 
84e7348
bb16116
 
 
84e7348
 
bb16116
 
84e7348
 
bb16116
84e7348
bb16116
84e7348
bb16116
84e7348
bb16116
84e7348
bb16116
84e7348
bb16116
84e7348
bb16116
84e7348
bb16116
84e7348
bb16116
 
 
 
 
 
 
 
84e7348
bb16116
 
84e7348
bb16116
 
 
 
 
 
84e7348
bb16116
 
 
 
 
84e7348
bb16116
 
 
 
 
 
 
 
 
 
84e7348
bb16116
 
 
84e7348
bb16116
 
 
 
 
84e7348
bb16116
 
 
84e7348
bb16116
 
 
84e7348
bb16116
 
 
84e7348
bb16116
 
84e7348
bb16116
 
84e7348
bb16116
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 26 21:07:00 2023

@author: Bernd Ebenhoch
"""


import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import streamlit as st
plt.style.use('mystyle.mplstyle')

# Defining the neural network as the agent to chose ad scheme A (0) or B (1)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_shape=(1,)))
model.summary()

information_for_plotting = np.zeros((epochs, 10))


@tf.function()
def action_selection(model):
    # Using GgradientTape to automatically build gradients with TensorFlow
    with tf.GradientTape() as tape:

        # As we have no information about the user viewer the ad,
        # the input in the neural network is always the same: 0
        output = model(np.array([[0.0]]))  # [0 ... 1]

        # The output of the neural network is considered as probability for
        # taking action A (0) or B (1)
        # We compare the output with a uniform random variable
        # For example, if the output is 0.8,
        # we have 80% chance that random variable is smaller, taking action B (1)
        # and 20% chance that the random variable is larger, taking action A (0)
        action = (tf.random.uniform((1, 1)) < output)  # [0 oder 1]

        # The loss value measures the difference between the output and the action
        loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(action, output))

    # We are creating the gradients [dloss/dw, dloss/db]
    grads = tape.gradient(loss, model.trainable_variables)
    return output, action, loss, grads


st.markdown(
    'Simulate A/B optimization with policy gradient reinforcement learning')


learning_rate = st.text_area('Learning rate', value=0.1, height=25)

prob_A = st.text_area('Click probability of ad A', 0.4, height=75)

prob_A = st.text_area('Click probability of ad B', 0.5, height=75)

epochs = st.text_area('Number of ad impressions (epochs)', 2000, height=75)

if st.button('Modell trainieren und Fit-Kurve darstellen'):

    with st.spinner('Simulating the ad campaign may take a few seconds ...'):

        for epoch in range(epochs):

            output, action, loss, grads = action_selection(model)

            # Next we are applying the action by displaying ad A or B
            # As we do not want to wait if a user clicks the ad,
            # we are simulating a click rate
            # Ad A has with 40% click rate a lower chance of being clicked
            # than Ad B with 50% click rate
            # We consider the click rate as a measure of the reward for training
            if action == False:  # Action A
                reward = float(np.random.random() < 0.4)

            if action == True:  # Action B
                reward = float(np.random.random() < 0.5)

            # The gradients obtained above are multiplied with the acquired reward
            # Gradients for actions that lead to clicks are kept unchanged,
            # whereas gradients for actions that do not lead to clicks are reversed
            grads_adjusted = []
            for var_index in range(len(model.trainable_variables)):
                grads_adjusted.append((reward-0.5)*2 * grads[var_index])

            # Using standard backpropagation, we apply the gradients to update the model parameters
            model.trainable_variables[0].assign(
                model.trainable_variables[0]-lr*grads_adjusted[0])
            model.trainable_variables[1].assign(
                model.trainable_variables[1]-lr*grads_adjusted[1])

            information_for_plotting[epoch, 0] = output.numpy()[0]
            information_for_plotting[epoch, 1] = action.numpy()[0].astype(int)
            information_for_plotting[epoch, 2] = loss
            information_for_plotting[epoch, 3] = grads[0]
            information_for_plotting[epoch, 4] = grads[1]
            information_for_plotting[epoch, 5] = reward
            information_for_plotting[epoch, 6] = grads_adjusted[0]
            information_for_plotting[epoch, 7] = grads_adjusted[1]
            information_for_plotting[epoch, 8] = copy.deepcopy(model.trainable_variables[0])
            information_for_plotting[epoch, 9] = copy.deepcopy(model.trainable_variables[1])

        titles = ['Model Output', 'Action', 'Loss', 'Gradients', 'Rewards',
                  'Adjusted Gradients', 'Model Parameters']
        plus = [0, 0, 0, 0, 1, 1, 2]

        fig = plt.figure(figsize=(12, 26))
        fig.subplots(7, 1, sharex=True)
        for i in range(7):
            plt.subplot(7, 1, i+1)
            plt.subplots_adjust(hspace=.0)

            if i in [0, 1, 2, 4]:
                plt.plot(information_for_plotting[:, i+plus[i]])
                plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))

            else:
                plt.plot(information_for_plotting[:, i+1+plus[i]], label='Bias')
                plt.plot(information_for_plotting[:, i+plus[i]], label='Weight')

                plt.legend(loc="upper left")
                plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))
            plt.ylabel(titles[i])

        plt.xlabel('Epoch')
        plt.show()

        st.markdown('Your ad campaign received ' +
                    str(int(information_for_plotting[:, 5].sum())) + ' clicks in total.')

        st.pyplot(fig)