AB_optimization

Running

Bernd-Ebenhoch commited on May 1, 2023

Commit

d064765

1 Parent(s): b9ca64f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ model = tf.keras.models.Sequential()
 model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_shape=(1,)))
 model.summary()
-information_for_plotting = np.zeros((epochs, 10))
 @tf.function()
@@ -54,10 +54,12 @@ learning_rate = st.text_area('Learning rate', value=0.1, height=25)
 prob_A = st.text_area('Click probability of ad A', 0.4, height=75)
-prob_A = st.text_area('Click probability of ad B', 0.5, height=75)
 epochs = st.text_area('Number of ad impressions (epochs)', 2000, height=75)
 if st.button('Modell trainieren und Fit-Kurve darstellen'):
     with st.spinner('Simulating the ad campaign may take a few seconds ...'):
@@ -73,10 +75,10 @@ if st.button('Modell trainieren und Fit-Kurve darstellen'):
             # than Ad B with 50% click rate
             # We consider the click rate as a measure of the reward for training
             if action == False:  # Action A
-                reward = float(np.random.random() < 0.4)
             if action == True:  # Action B
-                reward = float(np.random.random() < 0.5)
             # The gradients obtained above are multiplied with the acquired reward
             # Gradients for actions that lead to clicks are kept unchanged,

 model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_shape=(1,)))
 model.summary()
 @tf.function()
 prob_A = st.text_area('Click probability of ad A', 0.4, height=75)
+prob_B = st.text_area('Click probability of ad B', 0.5, height=75)
 epochs = st.text_area('Number of ad impressions (epochs)', 2000, height=75)
+information_for_plotting = np.zeros((epochs, 10))
 if st.button('Modell trainieren und Fit-Kurve darstellen'):
     with st.spinner('Simulating the ad campaign may take a few seconds ...'):
             # than Ad B with 50% click rate
             # We consider the click rate as a measure of the reward for training
             if action == False:  # Action A
+                reward = float(np.random.random() < prob_A)
             if action == True:  # Action B
+                reward = float(np.random.random() < prob_B)
             # The gradients obtained above are multiplied with the acquired reward
             # Gradients for actions that lead to clicks are kept unchanged,