File size: 5,647 Bytes
84e7348
 
2733fb3
84e7348
 
 
 
 
 
bb16116
 
84e7348
 
867614c
bb16116
84e7348
bb16116
 
 
 
84e7348
d064765
84e7348
 
bb16116
 
 
 
84e7348
bb16116
 
 
84e7348
bb16116
 
 
 
 
 
 
84e7348
bb16116
 
84e7348
bb16116
 
 
84e7348
 
bb16116
 
84e7348
 
6a2352e
84e7348
6a2352e
84e7348
a77475c
84e7348
a77475c
84e7348
a77475c
d064765
c50adda
84e7348
bb16116
84e7348
a77475c
84e7348
7350573
 
 
bb16116
84e7348
bb16116
 
 
 
 
 
 
d064765
84e7348
bb16116
d064765
84e7348
bb16116
 
 
 
 
 
84e7348
bb16116
 
 
 
 
84e7348
a77475c
 
 
 
 
 
 
 
 
 
2733fb3
 
bb16116
 
 
84e7348
bb16116
 
 
 
 
84e7348
bb16116
 
 
84e7348
bb16116
 
 
84e7348
bb16116
 
 
84e7348
a77475c
bb16116
84e7348
2733fb3
 
 
84e7348
bb16116
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# -*- coding: utf-8 -*-
"""
Created on Mon May  1 07:55:45 2023

@author: Bernd Ebenhoch
"""


import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import streamlit as st
import copy
plt.style.use('mystyle.mplstyle')

# Defining the neural network as the agent to chose ad scheme A (0) or B (1)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_shape=(1,)))
model.summary()




@tf.function()
def action_selection(model):
    # Using GgradientTape to automatically build gradients with TensorFlow
    with tf.GradientTape() as tape:

        # As we have no information about the user viewer the ad,
        # the input in the neural network is always the same: 0
        output = model(np.array([[0.0]]))  # [0 ... 1]

        # The output of the neural network is considered as probability for
        # taking action A (0) or B (1)
        # We compare the output with a uniform random variable
        # For example, if the output is 0.8,
        # we have 80% chance that random variable is smaller, taking action B (1)
        # and 20% chance that the random variable is larger, taking action A (0)
        action = (tf.random.uniform((1, 1)) < output)  # [0 oder 1]

        # The loss value measures the difference between the output and the action
        loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(action, output))

    # We are creating the gradients [dloss/dw, dloss/db]
    grads = tape.gradient(loss, model.trainable_variables)
    return output, action, loss, grads


st.markdown(
    'Simulate A/B optimization with policy gradient reinforcement learning')


lr = float(st.text_input('Learning rate', value=0.5))

prob_A = float(st.text_input('Click probability of ad A', value=0.3))

prob_B = float(st.text_input('Click probability of ad B', value=0.4))

steps = int(st.text_input('Number of ad impressions (steps)', value=1000))

information_for_plotting = np.zeros((steps, 10))

if st.button('Run the ad campaign and display the results'):

    with st.spinner('Simulating the ad campaign may take a few seconds ...'):

        for step in range(steps):

            # The neural network is used to choose the action
            # To display the learning progress, we also record the 
            # model output, loss and gradients
            output, action, loss, grads = action_selection(model)

            # Next we are applying the action by displaying ad A or B
            # As we do not want to wait if a user clicks the ad,
            # we are simulating a click rate
            # Ad A has with 40% click rate a lower chance of being clicked
            # than Ad B with 50% click rate
            # We consider the click rate as a measure of the reward for training
            if action == False:  # Action A
                reward = float(np.random.random() < prob_A)

            if action == True:  # Action B
                reward = float(np.random.random() < prob_B)

            # The gradients obtained above are multiplied with the acquired reward
            # Gradients for actions that lead to clicks are kept unchanged,
            # whereas gradients for actions that do not lead to clicks are reversed
            grads_adjusted = []
            for var_index in range(len(model.trainable_variables)):
                grads_adjusted.append((reward-0.5)*2 * grads[var_index])

            # Using standard backpropagation, we apply the gradients to update the model parameters
            model.trainable_variables[0].assign(
                model.trainable_variables[0]-lr*grads_adjusted[0])
            model.trainable_variables[1].assign(
                model.trainable_variables[1]-lr*grads_adjusted[1])

            information_for_plotting[step, 0] = output.numpy()[0]
            information_for_plotting[step, 1] = action.numpy()[0].astype(int)
            information_for_plotting[step, 2] = loss
            information_for_plotting[step, 3] = grads[0]
            information_for_plotting[step, 4] = grads[1]
            information_for_plotting[step, 5] = reward
            information_for_plotting[step, 6] = grads_adjusted[0]
            information_for_plotting[step, 7] = grads_adjusted[1]
            information_for_plotting[step, 8] = copy.deepcopy(model.trainable_variables[0])
            information_for_plotting[step, 9] = copy.deepcopy(model.trainable_variables[1])
            
        # Plot the results
        titles = ['Model Output', 'Action', 'Loss', 'Gradients', 'Rewards',
                  'Adjusted Gradients', 'Model Parameters']
        plus = [0, 0, 0, 0, 1, 1, 2]

        fig = plt.figure(figsize=(12, 26))
        fig.subplots(7, 1, sharex=True)
        for i in range(7):
            plt.subplot(7, 1, i+1)
            plt.subplots_adjust(hspace=.0)

            if i in [0, 1, 2, 4]:
                plt.plot(information_for_plotting[:, i+plus[i]])
                plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))

            else:
                plt.plot(information_for_plotting[:, i+1+plus[i]], label='Bias')
                plt.plot(information_for_plotting[:, i+plus[i]], label='Weight')

                plt.legend(loc="upper left")
                plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))
            plt.ylabel(titles[i])

        plt.xlabel('Step')
        plt.show()

        # Sum of the total clicks obtained
        st.markdown('Your ad campaign received **' +
                    str(int(information_for_plotting[:, 5].sum())) + '** clicks in total.')

        st.pyplot(fig)