JVice commited on
Commit
0560487
·
1 Parent(s): 85d1a2a

updated for committing to user database file path

Browse files
Files changed (1) hide show
  1. streamlit-app.py +343 -0
streamlit-app.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ st.set_page_config(layout="wide")
3
+ import streamlit_authenticator as stauth
4
+ import pandas as pd
5
+ import numpy as np
6
+ import model_comparison as MCOMP
7
+ import model_loading as MLOAD
8
+ import model_inferencing as MINFER
9
+ import user_evaluation_variables
10
+ import tab_manager
11
+ import yaml
12
+ from yaml.loader import SafeLoader
13
+ from PIL import Image
14
+ AUTHENTICATOR = None
15
+ TBYB_LOGO = Image.open('./assets/TBYB_logo_light.png')
16
+ USER_LOGGED_IN = False
17
+ USER_DATABASE_PATH = './data/user_database.yaml'
18
+ def create_new_user(authenticator, users):
19
+ try:
20
+ if authenticator.register_user('Register user', preauthorization=False):
21
+ st.success('User registered successfully')
22
+ except Exception as e:
23
+ st.error(e)
24
+ with open(USER_DATABASE_PATH, 'w') as file:
25
+ yaml.dump(users, file, default_flow_style=False)
26
+ def forgot_password(authenticator, users):
27
+ try:
28
+ username_of_forgotten_password, email_of_forgotten_password, new_random_password = authenticator.forgot_password(
29
+ 'Forgot password')
30
+ if username_of_forgotten_password:
31
+ st.success('New password to be sent securely')
32
+ # Random password should be transferred to user securely
33
+ except Exception as e:
34
+ st.error(e)
35
+ with open(USER_DATABASE_PATH, 'w') as file:
36
+ yaml.dump(users, file, default_flow_style=False)
37
+ def update_account_details(authenticator, users):
38
+ if st.session_state["authentication_status"]:
39
+ try:
40
+ if authenticator.update_user_details(st.session_state["username"], 'Update user details'):
41
+ st.success('Entries updated successfully')
42
+ except Exception as e:
43
+ st.error(e)
44
+ with open(USER_DATABASE_PATH, 'w') as file:
45
+ yaml.dump(users, file, default_flow_style=False)
46
+ def reset_password(authenticator, users):
47
+ if st.session_state["authentication_status"]:
48
+ try:
49
+ if authenticator.reset_password(st.session_state["username"], 'Reset password'):
50
+ st.success('Password modified successfully')
51
+ except Exception as e:
52
+ st.error(e)
53
+ with open(USER_DATABASE_PATH, 'w') as file:
54
+ yaml.dump(users, file, default_flow_style=False)
55
+ def user_login_create():
56
+ global AUTHENTICATOR
57
+ global TBYB_LOGO
58
+ global USER_LOGGED_IN
59
+ users = None
60
+ with open(USER_DATABASE_PATH) as file:
61
+ users = yaml.load(file, Loader=SafeLoader)
62
+ AUTHENTICATOR = stauth.Authenticate(
63
+ users['credentials'],
64
+ users['cookie']['name'],
65
+ users['cookie']['key'],
66
+ users['cookie']['expiry_days'],
67
+ users['preauthorized']
68
+ )
69
+ with st.sidebar:
70
+ st.image(TBYB_LOGO, width=70)
71
+ loginTab, registerTab, detailsTab = st.tabs(["Log in", "Register", "Account details"])
72
+
73
+ with loginTab:
74
+ name, authentication_status, username = AUTHENTICATOR.login('Login', 'main')
75
+ if authentication_status:
76
+ AUTHENTICATOR.logout('Logout', 'main')
77
+ st.write(f'Welcome *{name}*')
78
+ user_evaluation_variables.USERNAME = username
79
+ USER_LOGGED_IN = True
80
+ elif authentication_status == False:
81
+ st.error('Username/password is incorrect')
82
+ forgot_password(AUTHENTICATOR, users)
83
+ elif authentication_status == None:
84
+ st.warning('Please enter your username and password')
85
+ forgot_password(AUTHENTICATOR, users)
86
+ if not authentication_status:
87
+ with registerTab:
88
+ create_new_user(AUTHENTICATOR, users)
89
+ else:
90
+ with detailsTab:
91
+ st.write('**Username:** ', username)
92
+ st.write('**Name:** ', name)
93
+ st.write('**Email:** ', users['credentials']['usernames'][username]['email'])
94
+ # update_account_details(AUTHENTICATOR, users)
95
+ reset_password(AUTHENTICATOR, users)
96
+
97
+
98
+ return USER_LOGGED_IN
99
+ def setup_page_banner():
100
+ global USER_LOGGED_IN
101
+ # for tab in [tab1, tab2, tab3, tab4, tab5]:
102
+ c1,c2,c3,c4,c5,c6,c7,c8,c9 = st.columns(9)
103
+ with c5:
104
+ st.image(TBYB_LOGO, use_column_width=True)
105
+ for col in [c1,c2,c3,c4,c5,c6,c7,c8,c9]:
106
+ col = None
107
+ st.title('Try Before You Bias (TBYB)')
108
+ st.write('*A Quantitative T2I Bias Evaluation Tool*')
109
+ def setup_how_to():
110
+ expander = st.expander("How to Use")
111
+ expander.write("1. Login to your TBYB Account using the bar on the right\n"
112
+ "2. Navigate to the '\U0001F527 Setup' tab and input the ID of the HuggingFace \U0001F917 T2I model you want to evaluate\n")
113
+ expander.image(Image.open('./assets/HF_MODEL_ID_EXAMPLE.png'))
114
+ expander.write("3. Test your chosen model by generating an image using an input prompt e.g.: 'A corgi with some cool sunglasses'\n")
115
+ expander.image(Image.open('./assets/lykon_corgi.png'))
116
+ expander.write("4. Navigate to the '\U0001F30E General Eval.' or '\U0001F3AF Task-Oriented Eval.' tabs "
117
+ " to evaluate your model once it has been loaded\n"
118
+ "5. Once you have generated some evaluation images, head over to the '\U0001F4C1 Generated Images' tab to have a look at them\n"
119
+ "6. To check out your evaluations or all of the TBYB Community evaluations, head over to the '\U0001F4CA Model Comparison' tab\n"
120
+ "7. For more information about the evaluation process, see our paper at --PAPER HYPERLINK-- or navigate to the "
121
+ " '\U0001F4F0 Additional Information' tab for a TL;DR.\n"
122
+ "8. For any questions or to report any bugs/issues. Please contact [email protected].\n")
123
+
124
+ def setup_additional_information_tab(tab):
125
+ with tab:
126
+ st.header("1. Quantifying Bias in Text-to-Image (T2I) Generative Models")
127
+ st.markdown(
128
+ """
129
+ *Based on the article of the same name available here --PAPER HYPERLINK--
130
+
131
+ Authors: Jordan Vice, Naveed Akhtar, Richard Hartley and Ajmal Mian
132
+
133
+ This web-app was developed by **Jordan Vice** to accompany the article, serving as a practical
134
+ implementation of how T2I model biases can be quantitatively assessed and compared. Evaluation results from
135
+ all *base* models discussed in the paper have been incorporated into the TBYB community results and we hope
136
+ that others share their evaluations as we look to further the discussion on transparency and reliability
137
+ of T2I models.
138
+
139
+ """)
140
+
141
+ st.header('2. A (very) Brief Summary')
142
+ st.image(Image.open('./assets/TBYB_flowchart.png'))
143
+ st.markdown(
144
+ """
145
+ Bias in text-to-image models can propagate unfair social representations and could be exploited to
146
+ aggressively market ideas or push controversial or sinister agendas. Existing T2I model bias evaluation
147
+ methods focused on social biases. So, we proposed a bias evaluation methodology that considered
148
+ general and task-oriented biases, spawning the Try Before You Bias (**TBYB**) application as a result.
149
+ """
150
+ )
151
+ st.markdown(
152
+ """
153
+ We proposed three novel metrics to quantify T2I model biases:
154
+ 1. Distribution Bias - $B_D$
155
+ 2. Jaccard Hallucination - $H_J$
156
+ 3. Generative Miss Rate - $M_G$
157
+
158
+ Open the appropriate drop-down menu to understand the logic and inspiration behind metric.
159
+ """
160
+ )
161
+ c1,c2,c3 = st.columns(3)
162
+ with c1:
163
+ with st.expander("Distribution Bias - $B_D$"):
164
+ st.markdown(
165
+ """
166
+ Using the Area under the Curve (AuC) as an evaluation metric in machine learning is not novel. However,
167
+ in the context of T2I models, using AuC allows us to define the distribution of objects that have been
168
+ detected in generated output image scenes.
169
+
170
+ So, everytime an object is detected in a scene, we update a dictionary (which is available for
171
+ download after running an evaluation). After evaluating a full set of images, you can use this
172
+ information to determine what objects appear more frequently than others.
173
+
174
+ After all images are evaluated, we sort the objects in descending order and normalize the data. We
175
+ then use the normalized values to calculate $B_D$, using the trapezoidal AuC rule i.e.:
176
+
177
+ $B_D = \\Sigma_{i=1}^M\\frac{n_i+n_{i=1}}{2}$
178
+
179
+ So, if a user conducts a task-oriented study on biases related to **dogs** using a model
180
+ that was heavily biased using pictures of animals in the wild. You might find that after running
181
+ evaluations, the most common objects detected were trees and grass - even if these objects weren't
182
+ specified in the prompt. This would result in a very low $B_D$ in comparison to a model that for
183
+ example was trained on images of dogs and animals in various different scenarios $\\rightarrow$
184
+ which would result in a *higher* $B_D$ in comparison.
185
+ """
186
+ )
187
+ with c2:
188
+ with st.expander("Jaccard Hallucination - $H_J$"):
189
+ st.markdown(
190
+ """
191
+ Hallucination is a very common phenomena that is discussed in relation to generative AI, particularly
192
+ in relation to some of the most popular large language models. Depending on where you look, hallucinations
193
+ can be defined as being positive, negative, or just something to observe $\\rightarrow$ a sentiment
194
+ that we echo in our bias evaluations.
195
+
196
+ Now, how does hallucination tie into bias? In our work, we use hallucination to define how often a
197
+ T2I model will *add* objects that weren't specified OR, how often it will *omit* objects that were
198
+ specified. This indicates that there could be an innate shift in bias in the model, causing it to
199
+ add or omit certain objects.
200
+
201
+ Initially, we considered using two variables $H^+$ and $H^-$ to define these two dimensions of
202
+ hallucination. Then, we considered the Jaccard similarity coefficient, which
203
+ measures the similarity *and* diversity of two sets of objects/samples - defining this as
204
+ Jaccard Hallucination - $H_J$.
205
+
206
+ Simply put, we define the set of objects detected in the input prompt and then detect the objects in
207
+ the corresponding output image. Then, we determine the intersect over union. For a model, we
208
+ calculate the average $H_J$ across generated images using:
209
+
210
+ $H_J = \\frac{\Sigma_{i=0}^{N-1}1-\\frac{\mathcal{X}_i\cap\mathcal{Y}_i}{\mathcal{X}_i\cup\mathcal{Y}_i}}{N}$
211
+
212
+ """
213
+ )
214
+ with c3:
215
+ with st.expander("Generative Miss Rate - $M_G$"):
216
+ st.markdown(
217
+ """
218
+ Whenever fairness and trust are discussed in the context of machine learning and AI systems,
219
+ performance is always highlighted as a key metric - regardless of the downstream task. So, in terms
220
+ of evaluating bias, we thought that it would be important to see if there was a correlation
221
+ between bias and performance (as we predicted). And while the other metrics do evaluate biases
222
+ in terms of misalignment, they do not consider the relationship between bias and performance.
223
+
224
+ We use an additional CLIP model to assist in calculating Generative Miss Rate - $M_G$. Logically,
225
+ as a model becomes more biased, it will begin to diverge away from the intended target and so, the
226
+ miss rate of the generative model will increase as a result. This was a major consideration when
227
+ designing this metric.
228
+
229
+ We use the CLIP model as a binary classifier, differentiating between two classes:
230
+ - the prompt used to generate the image
231
+ - **NOT** the prompt
232
+
233
+ Through our experiments on intentionally-biased T2I models, we found that there was a clear
234
+ relationship between $M_G$ and the extent of bias. So, we can use this metric to quantify and infer
235
+ how badly model performances have been affected by their biases.
236
+ """
237
+ )
238
+ st.header('3. TBYB Constraints')
239
+ st.markdown(
240
+ """
241
+ While we have attempted to design a comprehensive, automated bias evaluation tool. We must acknowledge that
242
+ in its infancy, TBYB has some constraints:
243
+ - We have not checked the validity of *every* single T2I model and model type on HuggingFace so we cannot
244
+ promise that all T2I models will work - if you run into any issues that you think should be possible, feel
245
+ free to reach out!
246
+ - Currently, a model_index.json file is required to load models and use them with TBYB, we will look to
247
+ address other models in future works
248
+ - TBYB only works on T2I models hosted on HuggingFace, other model repositories are not currently supported
249
+ - Adaptor models are not currently supported, we will look to add evaluation functionalities of these
250
+ models in the future.
251
+ - Download, generation, inference and evaluation times are all hardware dependent.
252
+
253
+ Keep in mind that these constraints may be removed or added to any time.
254
+ """)
255
+ st.header('4. Misuse, Malicious Use, and Out-of-Scope Use')
256
+ st.markdown(
257
+ """
258
+ Given this application is used for the assessment of T2I biases and relies on
259
+ pre-trained models available on HuggingFace, we are not responsible for any content generated
260
+ by public-facing models that have been used to generate images using this application.
261
+
262
+ TBYB is proposed as an auxiliary tool to assess model biases and thus, if a chosen model is found to output
263
+ insensitive, disturbing, distressing or offensive images that propagate harmful stereotypes or
264
+ representations of marginalised groups, please address your concerns to the model providers.
265
+
266
+
267
+ However, given the TBYB tool is designed for bias quantification and is driven by transparency, it would be
268
+ beneficial to the TBYB community to share evaluations of biased T2I models!
269
+
270
+ We share no association with HuggingFace \U0001F917, we only use their services as a model repository,
271
+ given their growth in popularity in the computer science community recently.
272
+
273
+
274
+ For further questions/queries or if you want to simply strike a conversation,
275
+ please reach out to Jordan Vice at: [email protected]""")
276
+
277
+ setup_page_banner()
278
+ setup_how_to()
279
+
280
+
281
+ if user_login_create():
282
+ tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["\U0001F527 Setup", "\U0001F30E General Eval.", "\U0001F3AF Task-Oriented Eval.",
283
+ "\U0001F4CA Model Comparison", "\U0001F4C1 Generated Images", "\U0001F4F0 Additional Information"])
284
+ setup_additional_information_tab(tab6)
285
+
286
+ # PLASTER THE LOGO EVERYWHERE
287
+ tab2.subheader("General Bias Evaluation")
288
+ tab2.write("Waiting for \U0001F527 Setup to be complete...")
289
+ tab3.subheader("Task-Oriented Bias Evaluation")
290
+ tab3.write("Waiting for \U0001F527 Setup to be complete...")
291
+ tab4.write("Check out other model evaluation results from users across the **TBYB** Community! \U0001F30E ")
292
+ tab4.write("You can also just compare your own model evaluations by clicking the '*Personal Evaluation*' buttons")
293
+ MCOMP.initialise_page(tab4)
294
+ tab5.subheader("Generated Images from General and Task-Oriented Bias Evaluations")
295
+ tab5.write("Waiting for \U0001F527 Setup to be complete...")
296
+
297
+ with tab1:
298
+ with st.form("model_definition_form", clear_on_submit=True):
299
+ modelID = st.text_input('Input the HuggingFace \U0001F917 T2I model_id for the model you '
300
+ 'want to analyse e.g.: "runwayml/stable-diffusion-v1-5"')
301
+ submitted1 = st.form_submit_button("Submit")
302
+ if modelID:
303
+ with st.spinner('Checking if ' + modelID + ' is valid and downloading it (if required)'):
304
+ modelLoaded = MLOAD.check_if_model_exists(modelID)
305
+ if modelLoaded is not None:
306
+ # st.write("Located " + modelID + " model_index.json file")
307
+ st.write("Located " + modelID)
308
+
309
+ modelType = MLOAD.get_model_info(modelLoaded)
310
+ if modelType is not None:
311
+ st.write("Model is of Type: ", modelType)
312
+
313
+ if submitted1:
314
+ MINFER.TargetModel = MLOAD.import_model(modelID, modelType)
315
+ if MINFER.TargetModel is not None:
316
+ st.write("Text-to-image pipeline looks like this:")
317
+ st.write(MINFER.TargetModel)
318
+ user_evaluation_variables.MODEL = modelID
319
+ user_evaluation_variables.MODEL_TYPE = modelType
320
+ else:
321
+ st.error('The Model: ' + modelID + ' does not appear to exist or the model does not contain a model_index.json file.'
322
+ ' Please check that that HuggingFace repo ID is valid.'
323
+ ' For more help, please see the "How to Use" Tab above.', icon="🚨")
324
+ if modelID:
325
+ with st.form("example_image_gen_form", clear_on_submit=True):
326
+ testPrompt = st.text_input('Input a random test prompt to test out your '
327
+ 'chosen model and see if its generating images:')
328
+ submitted2 = st.form_submit_button("Submit")
329
+ if testPrompt and submitted2:
330
+ with st.spinner("Generating an image with the prompt:\n"+testPrompt+"(This may take some time)"):
331
+ testImage = MINFER.generate_test_image(MINFER.TargetModel, testPrompt)
332
+ st.image(testImage, caption='Model: ' + modelID + ' Prompt: ' + testPrompt)
333
+ st.write('''If you are happy with this model, navigate to the other tabs to evaluate bias!
334
+ Otherwise, feel free to load up a different model and run it again''')
335
+
336
+ if MINFER.TargetModel is not None:
337
+ tab_manager.completed_setup([tab2, tab3, tab4, tab5], modelID)
338
+ else:
339
+ MCOMP.databaseDF = None
340
+ user_evaluation_variables.reset_variables('general')
341
+ user_evaluation_variables.reset_variables('task-oriented')
342
+ st.write('')
343
+ st.warning('Log in or register your email to get started! ', icon="⚠️")