Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
""" | |
Created on Sun Mar 26 21:07:00 2023 | |
@author: Bernd Ebenhoch | |
""" | |
import numpy as np | |
import tensorflow as tf | |
from tensorflow import keras | |
import matplotlib.pyplot as plt | |
import streamlit as st | |
import copy | |
plt.style.use('mystyle.mplstyle') | |
# Defining the neural network as the agent to chose ad scheme A (0) or B (1) | |
model = tf.keras.models.Sequential() | |
model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_shape=(1,))) | |
model.summary() | |
def action_selection(model): | |
# Using GgradientTape to automatically build gradients with TensorFlow | |
with tf.GradientTape() as tape: | |
# As we have no information about the user viewer the ad, | |
# the input in the neural network is always the same: 0 | |
output = model(np.array([[0.0]])) # [0 ... 1] | |
# The output of the neural network is considered as probability for | |
# taking action A (0) or B (1) | |
# We compare the output with a uniform random variable | |
# For example, if the output is 0.8, | |
# we have 80% chance that random variable is smaller, taking action B (1) | |
# and 20% chance that the random variable is larger, taking action A (0) | |
action = (tf.random.uniform((1, 1)) < output) # [0 oder 1] | |
# The loss value measures the difference between the output and the action | |
loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(action, output)) | |
# We are creating the gradients [dloss/dw, dloss/db] | |
grads = tape.gradient(loss, model.trainable_variables) | |
return output, action, loss, grads | |
st.markdown( | |
'Simulate A/B optimization with policy gradient reinforcement learning') | |
lr = float(st.text_input('Learning rate', value=0.1)) | |
prob_A = float(st.text_input('Click probability of ad A', value=0.4)) | |
prob_B = float(st.text_input('Click probability of ad B', value=0.5)) | |
epochs = int(st.text_input('Number of ad impressions (epochs)', value=2000)) | |
information_for_plotting = np.zeros((epochs, 10)) | |
if st.button('Run the ad campaign and display the results'): | |
with st.spinner('Simulating the ad campaign may take a few seconds ...'): | |
for epoch in range(epochs): | |
output, action, loss, grads = action_selection(model) | |
# Next we are applying the action by displaying ad A or B | |
# As we do not want to wait if a user clicks the ad, | |
# we are simulating a click rate | |
# Ad A has with 40% click rate a lower chance of being clicked | |
# than Ad B with 50% click rate | |
# We consider the click rate as a measure of the reward for training | |
if action == False: # Action A | |
reward = float(np.random.random() < prob_A) | |
if action == True: # Action B | |
reward = float(np.random.random() < prob_B) | |
# The gradients obtained above are multiplied with the acquired reward | |
# Gradients for actions that lead to clicks are kept unchanged, | |
# whereas gradients for actions that do not lead to clicks are reversed | |
grads_adjusted = [] | |
for var_index in range(len(model.trainable_variables)): | |
grads_adjusted.append((reward-0.5)*2 * grads[var_index]) | |
# Using standard backpropagation, we apply the gradients to update the model parameters | |
model.trainable_variables[0].assign( | |
model.trainable_variables[0]-lr*grads_adjusted[0]) | |
model.trainable_variables[1].assign( | |
model.trainable_variables[1]-lr*grads_adjusted[1]) | |
information_for_plotting[epoch, 0] = output.numpy()[0] | |
information_for_plotting[epoch, 1] = action.numpy()[0].astype(int) | |
information_for_plotting[epoch, 2] = loss | |
information_for_plotting[epoch, 3] = grads[0] | |
information_for_plotting[epoch, 4] = grads[1] | |
information_for_plotting[epoch, 5] = reward | |
information_for_plotting[epoch, 6] = grads_adjusted[0] | |
information_for_plotting[epoch, 7] = grads_adjusted[1] | |
information_for_plotting[epoch, 8] = copy.deepcopy(model.trainable_variables[0]) | |
information_for_plotting[epoch, 9] = copy.deepcopy(model.trainable_variables[1]) | |
titles = ['Model Output', 'Action', 'Loss', 'Gradients', 'Rewards', | |
'Adjusted Gradients', 'Model Parameters'] | |
plus = [0, 0, 0, 0, 1, 1, 2] | |
fig = plt.figure(figsize=(12, 26)) | |
fig.subplots(7, 1, sharex=True) | |
for i in range(7): | |
plt.subplot(7, 1, i+1) | |
plt.subplots_adjust(hspace=.0) | |
if i in [0, 1, 2, 4]: | |
plt.plot(information_for_plotting[:, i+plus[i]]) | |
plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f')) | |
else: | |
plt.plot(information_for_plotting[:, i+1+plus[i]], label='Bias') | |
plt.plot(information_for_plotting[:, i+plus[i]], label='Weight') | |
plt.legend(loc="upper left") | |
plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f')) | |
plt.ylabel(titles[i]) | |
plt.xlabel('Epoch') | |
plt.show() | |
st.markdown('Your ad campaign received ' + | |
str(int(information_for_plotting[:, 5].sum())) + ' clicks in total.') | |
st.pyplot(fig) | |