JVice commited on
Commit
153fba3
·
1 Parent(s): 0560487

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -343
app.py DELETED
@@ -1,343 +0,0 @@
1
- import streamlit as st
2
- st.set_page_config(layout="wide")
3
- import streamlit_authenticator as stauth
4
- import pandas as pd
5
- import numpy as np
6
- import model_comparison as MCOMP
7
- import model_loading as MLOAD
8
- import model_inferencing as MINFER
9
- import user_evaluation_variables
10
- import tab_manager
11
- import yaml
12
- from yaml.loader import SafeLoader
13
- from PIL import Image
14
- AUTHENTICATOR = None
15
- TBYB_LOGO = Image.open('./assets/TBYB_logo_light.png')
16
- USER_LOGGED_IN = False
17
- USER_DATABASE_PATH = './data/user_database.yaml'
18
- def create_new_user(authenticator, users):
19
- try:
20
- if authenticator.register_user('Register user', preauthorization=False):
21
- st.success('User registered successfully')
22
- except Exception as e:
23
- st.error(e)
24
- with open(USER_DATABASE_PATH, 'w') as file:
25
- yaml.dump(users, file, default_flow_style=False)
26
- def forgot_password(authenticator, users):
27
- try:
28
- username_of_forgotten_password, email_of_forgotten_password, new_random_password = authenticator.forgot_password(
29
- 'Forgot password')
30
- if username_of_forgotten_password:
31
- st.success('New password to be sent securely')
32
- # Random password should be transferred to user securely
33
- except Exception as e:
34
- st.error(e)
35
- with open(USER_DATABASE_PATH, 'w') as file:
36
- yaml.dump(users, file, default_flow_style=False)
37
- def update_account_details(authenticator, users):
38
- if st.session_state["authentication_status"]:
39
- try:
40
- if authenticator.update_user_details(st.session_state["username"], 'Update user details'):
41
- st.success('Entries updated successfully')
42
- except Exception as e:
43
- st.error(e)
44
- with open(USER_DATABASE_PATH, 'w') as file:
45
- yaml.dump(users, file, default_flow_style=False)
46
- def reset_password(authenticator, users):
47
- if st.session_state["authentication_status"]:
48
- try:
49
- if authenticator.reset_password(st.session_state["username"], 'Reset password'):
50
- st.success('Password modified successfully')
51
- except Exception as e:
52
- st.error(e)
53
- with open(USER_DATABASE_PATH, 'w') as file:
54
- yaml.dump(users, file, default_flow_style=False)
55
- def user_login_create():
56
- global AUTHENTICATOR
57
- global TBYB_LOGO
58
- global USER_LOGGED_IN
59
- users = None
60
- with open(USER_DATABASE_PATH) as file:
61
- users = yaml.load(file, Loader=SafeLoader)
62
- AUTHENTICATOR = stauth.Authenticate(
63
- users['credentials'],
64
- users['cookie']['name'],
65
- users['cookie']['key'],
66
- users['cookie']['expiry_days'],
67
- users['preauthorized']
68
- )
69
- with st.sidebar:
70
- st.image(TBYB_LOGO, width=70)
71
- loginTab, registerTab, detailsTab = st.tabs(["Log in", "Register", "Account details"])
72
-
73
- with loginTab:
74
- name, authentication_status, username = AUTHENTICATOR.login('Login', 'main')
75
- if authentication_status:
76
- AUTHENTICATOR.logout('Logout', 'main')
77
- st.write(f'Welcome *{name}*')
78
- user_evaluation_variables.USERNAME = username
79
- USER_LOGGED_IN = True
80
- elif authentication_status == False:
81
- st.error('Username/password is incorrect')
82
- forgot_password(AUTHENTICATOR, users)
83
- elif authentication_status == None:
84
- st.warning('Please enter your username and password')
85
- forgot_password(AUTHENTICATOR, users)
86
- if not authentication_status:
87
- with registerTab:
88
- create_new_user(AUTHENTICATOR, users)
89
- else:
90
- with detailsTab:
91
- st.write('**Username:** ', username)
92
- st.write('**Name:** ', name)
93
- st.write('**Email:** ', users['credentials']['usernames'][username]['email'])
94
- # update_account_details(AUTHENTICATOR, users)
95
- reset_password(AUTHENTICATOR, users)
96
-
97
-
98
- return USER_LOGGED_IN
99
- def setup_page_banner():
100
- global USER_LOGGED_IN
101
- # for tab in [tab1, tab2, tab3, tab4, tab5]:
102
- c1,c2,c3,c4,c5,c6,c7,c8,c9 = st.columns(9)
103
- with c5:
104
- st.image(TBYB_LOGO, use_column_width=True)
105
- for col in [c1,c2,c3,c4,c5,c6,c7,c8,c9]:
106
- col = None
107
- st.title('Try Before You Bias (TBYB)')
108
- st.write('*A Quantitative T2I Bias Evaluation Tool*')
109
- def setup_how_to():
110
- expander = st.expander("How to Use")
111
- expander.write("1. Login to your TBYB Account using the bar on the right\n"
112
- "2. Navigate to the '\U0001F527 Setup' tab and input the ID of the HuggingFace \U0001F917 T2I model you want to evaluate\n")
113
- expander.image(Image.open('./assets/HF_MODEL_ID_EXAMPLE.png'))
114
- expander.write("3. Test your chosen model by generating an image using an input prompt e.g.: 'A corgi with some cool sunglasses'\n")
115
- expander.image(Image.open('./assets/lykon_corgi.png'))
116
- expander.write("4. Navigate to the '\U0001F30E General Eval.' or '\U0001F3AF Task-Oriented Eval.' tabs "
117
- " to evaluate your model once it has been loaded\n"
118
- "5. Once you have generated some evaluation images, head over to the '\U0001F4C1 Generated Images' tab to have a look at them\n"
119
- "6. To check out your evaluations or all of the TBYB Community evaluations, head over to the '\U0001F4CA Model Comparison' tab\n"
120
- "7. For more information about the evaluation process, see our paper at --PAPER HYPERLINK-- or navigate to the "
121
- " '\U0001F4F0 Additional Information' tab for a TL;DR.\n"
122
- "8. For any questions or to report any bugs/issues. Please contact [email protected].\n")
123
-
124
- def setup_additional_information_tab(tab):
125
- with tab:
126
- st.header("1. Quantifying Bias in Text-to-Image (T2I) Generative Models")
127
- st.markdown(
128
- """
129
- *Based on the article of the same name available here --PAPER HYPERLINK--
130
-
131
- Authors: Jordan Vice, Naveed Akhtar, Richard Hartley and Ajmal Mian
132
-
133
- This web-app was developed by **Jordan Vice** to accompany the article, serving as a practical
134
- implementation of how T2I model biases can be quantitatively assessed and compared. Evaluation results from
135
- all *base* models discussed in the paper have been incorporated into the TBYB community results and we hope
136
- that others share their evaluations as we look to further the discussion on transparency and reliability
137
- of T2I models.
138
-
139
- """)
140
-
141
- st.header('2. A (very) Brief Summary')
142
- st.image(Image.open('./assets/TBYB_flowchart.png'))
143
- st.markdown(
144
- """
145
- Bias in text-to-image models can propagate unfair social representations and could be exploited to
146
- aggressively market ideas or push controversial or sinister agendas. Existing T2I model bias evaluation
147
- methods focused on social biases. So, we proposed a bias evaluation methodology that considered
148
- general and task-oriented biases, spawning the Try Before You Bias (**TBYB**) application as a result.
149
- """
150
- )
151
- st.markdown(
152
- """
153
- We proposed three novel metrics to quantify T2I model biases:
154
- 1. Distribution Bias - $B_D$
155
- 2. Jaccard Hallucination - $H_J$
156
- 3. Generative Miss Rate - $M_G$
157
-
158
- Open the appropriate drop-down menu to understand the logic and inspiration behind metric.
159
- """
160
- )
161
- c1,c2,c3 = st.columns(3)
162
- with c1:
163
- with st.expander("Distribution Bias - $B_D$"):
164
- st.markdown(
165
- """
166
- Using the Area under the Curve (AuC) as an evaluation metric in machine learning is not novel. However,
167
- in the context of T2I models, using AuC allows us to define the distribution of objects that have been
168
- detected in generated output image scenes.
169
-
170
- So, everytime an object is detected in a scene, we update a dictionary (which is available for
171
- download after running an evaluation). After evaluating a full set of images, you can use this
172
- information to determine what objects appear more frequently than others.
173
-
174
- After all images are evaluated, we sort the objects in descending order and normalize the data. We
175
- then use the normalized values to calculate $B_D$, using the trapezoidal AuC rule i.e.:
176
-
177
- $B_D = \\Sigma_{i=1}^M\\frac{n_i+n_{i=1}}{2}$
178
-
179
- So, if a user conducts a task-oriented study on biases related to **dogs** using a model
180
- that was heavily biased using pictures of animals in the wild. You might find that after running
181
- evaluations, the most common objects detected were trees and grass - even if these objects weren't
182
- specified in the prompt. This would result in a very low $B_D$ in comparison to a model that for
183
- example was trained on images of dogs and animals in various different scenarios $\\rightarrow$
184
- which would result in a *higher* $B_D$ in comparison.
185
- """
186
- )
187
- with c2:
188
- with st.expander("Jaccard Hallucination - $H_J$"):
189
- st.markdown(
190
- """
191
- Hallucination is a very common phenomena that is discussed in relation to generative AI, particularly
192
- in relation to some of the most popular large language models. Depending on where you look, hallucinations
193
- can be defined as being positive, negative, or just something to observe $\\rightarrow$ a sentiment
194
- that we echo in our bias evaluations.
195
-
196
- Now, how does hallucination tie into bias? In our work, we use hallucination to define how often a
197
- T2I model will *add* objects that weren't specified OR, how often it will *omit* objects that were
198
- specified. This indicates that there could be an innate shift in bias in the model, causing it to
199
- add or omit certain objects.
200
-
201
- Initially, we considered using two variables $H^+$ and $H^-$ to define these two dimensions of
202
- hallucination. Then, we considered the Jaccard similarity coefficient, which
203
- measures the similarity *and* diversity of two sets of objects/samples - defining this as
204
- Jaccard Hallucination - $H_J$.
205
-
206
- Simply put, we define the set of objects detected in the input prompt and then detect the objects in
207
- the corresponding output image. Then, we determine the intersect over union. For a model, we
208
- calculate the average $H_J$ across generated images using:
209
-
210
- $H_J = \\frac{\Sigma_{i=0}^{N-1}1-\\frac{\mathcal{X}_i\cap\mathcal{Y}_i}{\mathcal{X}_i\cup\mathcal{Y}_i}}{N}$
211
-
212
- """
213
- )
214
- with c3:
215
- with st.expander("Generative Miss Rate - $M_G$"):
216
- st.markdown(
217
- """
218
- Whenever fairness and trust are discussed in the context of machine learning and AI systems,
219
- performance is always highlighted as a key metric - regardless of the downstream task. So, in terms
220
- of evaluating bias, we thought that it would be important to see if there was a correlation
221
- between bias and performance (as we predicted). And while the other metrics do evaluate biases
222
- in terms of misalignment, they do not consider the relationship between bias and performance.
223
-
224
- We use an additional CLIP model to assist in calculating Generative Miss Rate - $M_G$. Logically,
225
- as a model becomes more biased, it will begin to diverge away from the intended target and so, the
226
- miss rate of the generative model will increase as a result. This was a major consideration when
227
- designing this metric.
228
-
229
- We use the CLIP model as a binary classifier, differentiating between two classes:
230
- - the prompt used to generate the image
231
- - **NOT** the prompt
232
-
233
- Through our experiments on intentionally-biased T2I models, we found that there was a clear
234
- relationship between $M_G$ and the extent of bias. So, we can use this metric to quantify and infer
235
- how badly model performances have been affected by their biases.
236
- """
237
- )
238
- st.header('3. TBYB Constraints')
239
- st.markdown(
240
- """
241
- While we have attempted to design a comprehensive, automated bias evaluation tool. We must acknowledge that
242
- in its infancy, TBYB has some constraints:
243
- - We have not checked the validity of *every* single T2I model and model type on HuggingFace so we cannot
244
- promise that all T2I models will work - if you run into any issues that you think should be possible, feel
245
- free to reach out!
246
- - Currently, a model_index.json file is required to load models and use them with TBYB, we will look to
247
- address other models in future works
248
- - TBYB only works on T2I models hosted on HuggingFace, other model repositories are not currently supported
249
- - Adaptor models are not currently supported, we will look to add evaluation functionalities of these
250
- models in the future.
251
- - Download, generation, inference and evaluation times are all hardware dependent.
252
-
253
- Keep in mind that these constraints may be removed or added to any time.
254
- """)
255
- st.header('4. Misuse, Malicious Use, and Out-of-Scope Use')
256
- st.markdown(
257
- """
258
- Given this application is used for the assessment of T2I biases and relies on
259
- pre-trained models available on HuggingFace, we are not responsible for any content generated
260
- by public-facing models that have been used to generate images using this application.
261
-
262
- TBYB is proposed as an auxiliary tool to assess model biases and thus, if a chosen model is found to output
263
- insensitive, disturbing, distressing or offensive images that propagate harmful stereotypes or
264
- representations of marginalised groups, please address your concerns to the model providers.
265
-
266
-
267
- However, given the TBYB tool is designed for bias quantification and is driven by transparency, it would be
268
- beneficial to the TBYB community to share evaluations of biased T2I models!
269
-
270
- We share no association with HuggingFace \U0001F917, we only use their services as a model repository,
271
- given their growth in popularity in the computer science community recently.
272
-
273
-
274
- For further questions/queries or if you want to simply strike a conversation,
275
- please reach out to Jordan Vice at: [email protected]""")
276
-
277
- setup_page_banner()
278
- setup_how_to()
279
-
280
-
281
- if user_login_create():
282
- tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["\U0001F527 Setup", "\U0001F30E General Eval.", "\U0001F3AF Task-Oriented Eval.",
283
- "\U0001F4CA Model Comparison", "\U0001F4C1 Generated Images", "\U0001F4F0 Additional Information"])
284
- setup_additional_information_tab(tab6)
285
-
286
- # PLASTER THE LOGO EVERYWHERE
287
- tab2.subheader("General Bias Evaluation")
288
- tab2.write("Waiting for \U0001F527 Setup to be complete...")
289
- tab3.subheader("Task-Oriented Bias Evaluation")
290
- tab3.write("Waiting for \U0001F527 Setup to be complete...")
291
- tab4.write("Check out other model evaluation results from users across the **TBYB** Community! \U0001F30E ")
292
- tab4.write("You can also just compare your own model evaluations by clicking the '*Personal Evaluation*' buttons")
293
- MCOMP.initialise_page(tab4)
294
- tab5.subheader("Generated Images from General and Task-Oriented Bias Evaluations")
295
- tab5.write("Waiting for \U0001F527 Setup to be complete...")
296
-
297
- with tab1:
298
- with st.form("model_definition_form", clear_on_submit=True):
299
- modelID = st.text_input('Input the HuggingFace \U0001F917 T2I model_id for the model you '
300
- 'want to analyse e.g.: "runwayml/stable-diffusion-v1-5"')
301
- submitted1 = st.form_submit_button("Submit")
302
- if modelID:
303
- with st.spinner('Checking if ' + modelID + ' is valid and downloading it (if required)'):
304
- modelLoaded = MLOAD.check_if_model_exists(modelID)
305
- if modelLoaded is not None:
306
- # st.write("Located " + modelID + " model_index.json file")
307
- st.write("Located " + modelID)
308
-
309
- modelType = MLOAD.get_model_info(modelLoaded)
310
- if modelType is not None:
311
- st.write("Model is of Type: ", modelType)
312
-
313
- if submitted1:
314
- MINFER.TargetModel = MLOAD.import_model(modelID, modelType)
315
- if MINFER.TargetModel is not None:
316
- st.write("Text-to-image pipeline looks like this:")
317
- st.write(MINFER.TargetModel)
318
- user_evaluation_variables.MODEL = modelID
319
- user_evaluation_variables.MODEL_TYPE = modelType
320
- else:
321
- st.error('The Model: ' + modelID + ' does not appear to exist or the model does not contain a model_index.json file.'
322
- ' Please check that that HuggingFace repo ID is valid.'
323
- ' For more help, please see the "How to Use" Tab above.', icon="🚨")
324
- if modelID:
325
- with st.form("example_image_gen_form", clear_on_submit=True):
326
- testPrompt = st.text_input('Input a random test prompt to test out your '
327
- 'chosen model and see if its generating images:')
328
- submitted2 = st.form_submit_button("Submit")
329
- if testPrompt and submitted2:
330
- with st.spinner("Generating an image with the prompt:\n"+testPrompt+"(This may take some time)"):
331
- testImage = MINFER.generate_test_image(MINFER.TargetModel, testPrompt)
332
- st.image(testImage, caption='Model: ' + modelID + ' Prompt: ' + testPrompt)
333
- st.write('''If you are happy with this model, navigate to the other tabs to evaluate bias!
334
- Otherwise, feel free to load up a different model and run it again''')
335
-
336
- if MINFER.TargetModel is not None:
337
- tab_manager.completed_setup([tab2, tab3, tab4, tab5], modelID)
338
- else:
339
- MCOMP.databaseDF = None
340
- user_evaluation_variables.reset_variables('general')
341
- user_evaluation_variables.reset_variables('task-oriented')
342
- st.write('')
343
- st.warning('Log in or register your email to get started! ', icon="⚠️")