Spaces:
Running
Running
File size: 5,386 Bytes
84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 84e7348 bb16116 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 26 21:07:00 2023
@author: Bernd Ebenhoch
"""
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import streamlit as st
plt.style.use('mystyle.mplstyle')
# Defining the neural network as the agent to chose ad scheme A (0) or B (1)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_shape=(1,)))
model.summary()
information_for_plotting = np.zeros((epochs, 10))
@tf.function()
def action_selection(model):
# Using GgradientTape to automatically build gradients with TensorFlow
with tf.GradientTape() as tape:
# As we have no information about the user viewer the ad,
# the input in the neural network is always the same: 0
output = model(np.array([[0.0]])) # [0 ... 1]
# The output of the neural network is considered as probability for
# taking action A (0) or B (1)
# We compare the output with a uniform random variable
# For example, if the output is 0.8,
# we have 80% chance that random variable is smaller, taking action B (1)
# and 20% chance that the random variable is larger, taking action A (0)
action = (tf.random.uniform((1, 1)) < output) # [0 oder 1]
# The loss value measures the difference between the output and the action
loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(action, output))
# We are creating the gradients [dloss/dw, dloss/db]
grads = tape.gradient(loss, model.trainable_variables)
return output, action, loss, grads
st.markdown(
'Simulate A/B optimization with policy gradient reinforcement learning')
learning_rate = st.text_area('Learning rate', value=0.1, height=25)
prob_A = st.text_area('Click probability of ad A', 0.4, height=75)
prob_A = st.text_area('Click probability of ad B', 0.5, height=75)
epochs = st.text_area('Number of ad impressions (epochs)', 2000, height=75)
if st.button('Modell trainieren und Fit-Kurve darstellen'):
with st.spinner('Simulating the ad campaign may take a few seconds ...'):
for epoch in range(epochs):
output, action, loss, grads = action_selection(model)
# Next we are applying the action by displaying ad A or B
# As we do not want to wait if a user clicks the ad,
# we are simulating a click rate
# Ad A has with 40% click rate a lower chance of being clicked
# than Ad B with 50% click rate
# We consider the click rate as a measure of the reward for training
if action == False: # Action A
reward = float(np.random.random() < 0.4)
if action == True: # Action B
reward = float(np.random.random() < 0.5)
# The gradients obtained above are multiplied with the acquired reward
# Gradients for actions that lead to clicks are kept unchanged,
# whereas gradients for actions that do not lead to clicks are reversed
grads_adjusted = []
for var_index in range(len(model.trainable_variables)):
grads_adjusted.append((reward-0.5)*2 * grads[var_index])
# Using standard backpropagation, we apply the gradients to update the model parameters
model.trainable_variables[0].assign(
model.trainable_variables[0]-lr*grads_adjusted[0])
model.trainable_variables[1].assign(
model.trainable_variables[1]-lr*grads_adjusted[1])
information_for_plotting[epoch, 0] = output.numpy()[0]
information_for_plotting[epoch, 1] = action.numpy()[0].astype(int)
information_for_plotting[epoch, 2] = loss
information_for_plotting[epoch, 3] = grads[0]
information_for_plotting[epoch, 4] = grads[1]
information_for_plotting[epoch, 5] = reward
information_for_plotting[epoch, 6] = grads_adjusted[0]
information_for_plotting[epoch, 7] = grads_adjusted[1]
information_for_plotting[epoch, 8] = copy.deepcopy(model.trainable_variables[0])
information_for_plotting[epoch, 9] = copy.deepcopy(model.trainable_variables[1])
titles = ['Model Output', 'Action', 'Loss', 'Gradients', 'Rewards',
'Adjusted Gradients', 'Model Parameters']
plus = [0, 0, 0, 0, 1, 1, 2]
fig = plt.figure(figsize=(12, 26))
fig.subplots(7, 1, sharex=True)
for i in range(7):
plt.subplot(7, 1, i+1)
plt.subplots_adjust(hspace=.0)
if i in [0, 1, 2, 4]:
plt.plot(information_for_plotting[:, i+plus[i]])
plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))
else:
plt.plot(information_for_plotting[:, i+1+plus[i]], label='Bias')
plt.plot(information_for_plotting[:, i+plus[i]], label='Weight')
plt.legend(loc="upper left")
plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))
plt.ylabel(titles[i])
plt.xlabel('Epoch')
plt.show()
st.markdown('Your ad campaign received ' +
str(int(information_for_plotting[:, 5].sum())) + ' clicks in total.')
st.pyplot(fig)
|