Spaces:
Sleeping
Sleeping
File size: 5,647 Bytes
84e7348 2733fb3 84e7348 bb16116 84e7348 867614c bb16116 84e7348 bb16116 84e7348 d064765 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 6a2352e 84e7348 6a2352e 84e7348 a77475c 84e7348 a77475c 84e7348 a77475c d064765 c50adda 84e7348 bb16116 84e7348 a77475c 84e7348 7350573 bb16116 84e7348 bb16116 d064765 84e7348 bb16116 d064765 84e7348 bb16116 84e7348 bb16116 84e7348 a77475c 2733fb3 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 a77475c bb16116 84e7348 2733fb3 84e7348 bb16116 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# -*- coding: utf-8 -*-
"""
Created on Mon May 1 07:55:45 2023
@author: Bernd Ebenhoch
"""
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import streamlit as st
import copy
plt.style.use('mystyle.mplstyle')
# Defining the neural network as the agent to chose ad scheme A (0) or B (1)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_shape=(1,)))
model.summary()
@tf.function()
def action_selection(model):
# Using GgradientTape to automatically build gradients with TensorFlow
with tf.GradientTape() as tape:
# As we have no information about the user viewer the ad,
# the input in the neural network is always the same: 0
output = model(np.array([[0.0]])) # [0 ... 1]
# The output of the neural network is considered as probability for
# taking action A (0) or B (1)
# We compare the output with a uniform random variable
# For example, if the output is 0.8,
# we have 80% chance that random variable is smaller, taking action B (1)
# and 20% chance that the random variable is larger, taking action A (0)
action = (tf.random.uniform((1, 1)) < output) # [0 oder 1]
# The loss value measures the difference between the output and the action
loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(action, output))
# We are creating the gradients [dloss/dw, dloss/db]
grads = tape.gradient(loss, model.trainable_variables)
return output, action, loss, grads
st.markdown(
'Simulate A/B optimization with policy gradient reinforcement learning')
lr = float(st.text_input('Learning rate', value=0.5))
prob_A = float(st.text_input('Click probability of ad A', value=0.3))
prob_B = float(st.text_input('Click probability of ad B', value=0.4))
steps = int(st.text_input('Number of ad impressions (steps)', value=1000))
information_for_plotting = np.zeros((steps, 10))
if st.button('Run the ad campaign and display the results'):
with st.spinner('Simulating the ad campaign may take a few seconds ...'):
for step in range(steps):
# The neural network is used to choose the action
# To display the learning progress, we also record the
# model output, loss and gradients
output, action, loss, grads = action_selection(model)
# Next we are applying the action by displaying ad A or B
# As we do not want to wait if a user clicks the ad,
# we are simulating a click rate
# Ad A has with 40% click rate a lower chance of being clicked
# than Ad B with 50% click rate
# We consider the click rate as a measure of the reward for training
if action == False: # Action A
reward = float(np.random.random() < prob_A)
if action == True: # Action B
reward = float(np.random.random() < prob_B)
# The gradients obtained above are multiplied with the acquired reward
# Gradients for actions that lead to clicks are kept unchanged,
# whereas gradients for actions that do not lead to clicks are reversed
grads_adjusted = []
for var_index in range(len(model.trainable_variables)):
grads_adjusted.append((reward-0.5)*2 * grads[var_index])
# Using standard backpropagation, we apply the gradients to update the model parameters
model.trainable_variables[0].assign(
model.trainable_variables[0]-lr*grads_adjusted[0])
model.trainable_variables[1].assign(
model.trainable_variables[1]-lr*grads_adjusted[1])
information_for_plotting[step, 0] = output.numpy()[0]
information_for_plotting[step, 1] = action.numpy()[0].astype(int)
information_for_plotting[step, 2] = loss
information_for_plotting[step, 3] = grads[0]
information_for_plotting[step, 4] = grads[1]
information_for_plotting[step, 5] = reward
information_for_plotting[step, 6] = grads_adjusted[0]
information_for_plotting[step, 7] = grads_adjusted[1]
information_for_plotting[step, 8] = copy.deepcopy(model.trainable_variables[0])
information_for_plotting[step, 9] = copy.deepcopy(model.trainable_variables[1])
# Plot the results
titles = ['Model Output', 'Action', 'Loss', 'Gradients', 'Rewards',
'Adjusted Gradients', 'Model Parameters']
plus = [0, 0, 0, 0, 1, 1, 2]
fig = plt.figure(figsize=(12, 26))
fig.subplots(7, 1, sharex=True)
for i in range(7):
plt.subplot(7, 1, i+1)
plt.subplots_adjust(hspace=.0)
if i in [0, 1, 2, 4]:
plt.plot(information_for_plotting[:, i+plus[i]])
plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))
else:
plt.plot(information_for_plotting[:, i+1+plus[i]], label='Bias')
plt.plot(information_for_plotting[:, i+plus[i]], label='Weight')
plt.legend(loc="upper left")
plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))
plt.ylabel(titles[i])
plt.xlabel('Step')
plt.show()
# Sum of the total clicks obtained
st.markdown('Your ad campaign received **' +
str(int(information_for_plotting[:, 5].sum())) + '** clicks in total.')
st.pyplot(fig)
|