Spaces:

JVice
/

try-before-you-bias

Sleeping

App Files Files Community

JVice commited on Dec 12, 2023

Commit

153fba3

1 Parent(s): 0560487

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -343

app.py DELETED Viewed

@@ -1,343 +0,0 @@
-import streamlit as st
-st.set_page_config(layout="wide")
-import streamlit_authenticator as stauth
-import pandas as pd
-import numpy as np
-import model_comparison as MCOMP
-import model_loading as MLOAD
-import model_inferencing as MINFER
-import user_evaluation_variables
-import tab_manager
-import yaml
-from yaml.loader import SafeLoader
-from PIL import Image
-AUTHENTICATOR = None
-TBYB_LOGO = Image.open('./assets/TBYB_logo_light.png')
-USER_LOGGED_IN = False
-USER_DATABASE_PATH = './data/user_database.yaml'
-def create_new_user(authenticator, users):
-    try:
-        if authenticator.register_user('Register user', preauthorization=False):
-            st.success('User registered successfully')
-    except Exception as e:
-        st.error(e)
-    with open(USER_DATABASE_PATH, 'w') as file:
-        yaml.dump(users, file, default_flow_style=False)
-def forgot_password(authenticator, users):
-    try:
-        username_of_forgotten_password, email_of_forgotten_password, new_random_password = authenticator.forgot_password(
-            'Forgot password')
-        if username_of_forgotten_password:
-            st.success('New password to be sent securely')
-            # Random password should be transferred to user securely
-    except Exception as e:
-        st.error(e)
-    with open(USER_DATABASE_PATH, 'w') as file:
-        yaml.dump(users, file, default_flow_style=False)
-def update_account_details(authenticator, users):
-    if st.session_state["authentication_status"]:
-        try:
-            if authenticator.update_user_details(st.session_state["username"], 'Update user details'):
-                st.success('Entries updated successfully')
-        except Exception as e:
-            st.error(e)
-    with open(USER_DATABASE_PATH, 'w') as file:
-        yaml.dump(users, file, default_flow_style=False)
-def reset_password(authenticator, users):
-    if st.session_state["authentication_status"]:
-        try:
-            if authenticator.reset_password(st.session_state["username"], 'Reset password'):
-                st.success('Password modified successfully')
-        except Exception as e:
-            st.error(e)
-    with open(USER_DATABASE_PATH, 'w') as file:
-        yaml.dump(users, file, default_flow_style=False)
-def user_login_create():
-    global AUTHENTICATOR
-    global TBYB_LOGO
-    global USER_LOGGED_IN
-    users = None
-    with open(USER_DATABASE_PATH) as file:
-        users = yaml.load(file, Loader=SafeLoader)
-        AUTHENTICATOR = stauth.Authenticate(
-            users['credentials'],
-            users['cookie']['name'],
-            users['cookie']['key'],
-            users['cookie']['expiry_days'],
-            users['preauthorized']
-        )
-    with st.sidebar:
-        st.image(TBYB_LOGO, width=70)
-        loginTab, registerTab, detailsTab = st.tabs(["Log in", "Register", "Account details"])
-        with loginTab:
-            name, authentication_status, username = AUTHENTICATOR.login('Login', 'main')
-            if authentication_status:
-                AUTHENTICATOR.logout('Logout', 'main')
-                st.write(f'Welcome *{name}*')
-                user_evaluation_variables.USERNAME = username
-                USER_LOGGED_IN = True
-            elif authentication_status == False:
-                st.error('Username/password is incorrect')
-                forgot_password(AUTHENTICATOR, users)
-            elif authentication_status == None:
-                st.warning('Please enter your username and password')
-                forgot_password(AUTHENTICATOR, users)
-        if not authentication_status:
-            with registerTab:
-                create_new_user(AUTHENTICATOR, users)
-        else:
-            with detailsTab:
-                st.write('**Username:** ', username)
-                st.write('**Name:** ', name)
-                st.write('**Email:** ', users['credentials']['usernames'][username]['email'])
-                # update_account_details(AUTHENTICATOR, users)
-                reset_password(AUTHENTICATOR, users)
-    return USER_LOGGED_IN
-def setup_page_banner():
-    global USER_LOGGED_IN
-    # for tab in [tab1, tab2, tab3, tab4, tab5]:
-    c1,c2,c3,c4,c5,c6,c7,c8,c9 = st.columns(9)
-    with c5:
-        st.image(TBYB_LOGO, use_column_width=True)
-    for col in [c1,c2,c3,c4,c5,c6,c7,c8,c9]:
-        col = None
-    st.title('Try Before You Bias (TBYB)')
-    st.write('*A Quantitative T2I Bias Evaluation Tool*')
-def setup_how_to():
-    expander = st.expander("How to Use")
-    expander.write("1. Login to your TBYB Account using the bar on the right\n"
-                   "2. Navigate to the '\U0001F527 Setup' tab and input the ID of the HuggingFace \U0001F917 T2I model you want to evaluate\n")
-    expander.image(Image.open('./assets/HF_MODEL_ID_EXAMPLE.png'))
-    expander.write("3. Test your chosen model by generating an image using an input prompt e.g.: 'A corgi with some cool sunglasses'\n")
-    expander.image(Image.open('./assets/lykon_corgi.png'))
-    expander.write("4. Navigate to the '\U0001F30E General Eval.' or '\U0001F3AF Task-Oriented Eval.' tabs "
-                   "   to evaluate your model once it has been loaded\n"
-                   "5. Once you have generated some evaluation images, head over to the '\U0001F4C1 Generated Images' tab to have a look at them\n"
-                   "6. To check out your evaluations or all of the TBYB Community evaluations, head over to the '\U0001F4CA Model Comparison' tab\n"
-                   "7. For more information about the evaluation process, see our paper at --PAPER HYPERLINK-- or navigate to the "
-                   "   '\U0001F4F0 Additional Information' tab for a TL;DR.\n"
-                   "8. For any questions or to report any bugs/issues. Please contact [email protected].\n")
-def setup_additional_information_tab(tab):
-    with tab:
-        st.header("1. Quantifying Bias in Text-to-Image (T2I) Generative Models")
-        st.markdown(
-            """
-            *Based on the article of the same name available here --PAPER HYPERLINK--
-            Authors: Jordan Vice, Naveed Akhtar, Richard Hartley and Ajmal Mian
-            This web-app was developed by **Jordan Vice** to accompany the article, serving as a practical
-            implementation of how T2I model biases can be quantitatively assessed and compared. Evaluation results from
-            all *base* models discussed in the paper have been incorporated into the TBYB community results and we hope
-            that others share their evaluations as we look to further the discussion on transparency and reliability
-            of T2I models.
-            """)
-        st.header('2. A (very) Brief Summary')
-        st.image(Image.open('./assets/TBYB_flowchart.png'))
-        st.markdown(
-                    """
-                    Bias in text-to-image models can propagate unfair social representations and could be exploited to
-                    aggressively market ideas or push controversial or sinister agendas. Existing T2I model bias evaluation
-                    methods focused on social biases. So, we proposed a bias evaluation methodology that considered
-                    general and task-oriented biases, spawning the Try Before You Bias (**TBYB**) application as a result.
-                    """
-                )
-        st.markdown(
-        """
-            We proposed three novel metrics to quantify T2I model biases:
-            1. Distribution Bias - $B_D$
-            2. Jaccard Hallucination - $H_J$
-            3. Generative Miss Rate - $M_G$
-            Open the appropriate drop-down menu to understand the logic and inspiration behind metric.
-            """
-        )
-        c1,c2,c3 = st.columns(3)
-        with c1:
-            with st.expander("Distribution Bias - $B_D$"):
-                st.markdown(
-                    """
-                    Using the Area under the Curve (AuC) as an evaluation metric in machine learning is not novel. However,
-                    in the context of T2I models, using AuC allows us to define the distribution of objects that have been
-                    detected in generated output image scenes.
-                    So, everytime an object is detected in a scene, we update a dictionary (which is available for
-                    download after running an evaluation). After evaluating a full set of images, you can use this
-                    information to determine what objects appear more frequently than others.
-                    After all images are evaluated, we sort the objects in descending order and normalize the data. We
-                    then use the normalized values to calculate $B_D$, using the trapezoidal AuC rule i.e.:
-                    $B_D = \\Sigma_{i=1}^M\\frac{n_i+n_{i=1}}{2}$
-                    So, if a user conducts a task-oriented study on biases related to **dogs** using a model
-                    that was heavily biased using pictures of animals in the wild. You might find that after running
-                    evaluations, the most common objects detected were trees and grass - even if these objects weren't
-                    specified in the prompt. This would result in a very low $B_D$ in comparison to a model that for
-                    example was trained on images of dogs and animals in various different scenarios $\\rightarrow$
-                    which would result in a *higher* $B_D$ in comparison.
-                    """
-                )
-        with c2:
-            with st.expander("Jaccard Hallucination - $H_J$"):
-                st.markdown(
-                    """
-                    Hallucination is a very common phenomena that is discussed in relation to generative AI, particularly
-                    in relation to some of the most popular large language models. Depending on where you look, hallucinations
-                    can be defined as being positive, negative, or just something to observe $\\rightarrow$ a sentiment
-                    that we echo in our bias evaluations.
-                    Now, how does hallucination tie into bias? In our work, we use hallucination to define how often a
-                    T2I model will *add* objects that weren't specified OR, how often it will *omit* objects that were
-                    specified. This indicates that there could be an innate shift in bias in the model, causing it to
-                    add or omit certain objects.
-                    Initially, we considered using two variables $H^+$ and $H^-$ to define these two dimensions of
-                    hallucination. Then, we considered the Jaccard similarity coefficient, which
-                    measures the similarity *and* diversity of two sets of objects/samples - defining this as
-                    Jaccard Hallucination - $H_J$.
-                    Simply put, we define the set of objects detected in the input prompt and then detect the objects in
-                    the corresponding output image. Then, we determine the intersect over union. For a model, we
-                    calculate the average $H_J$ across generated images using:
-                    $H_J = \\frac{\Sigma_{i=0}^{N-1}1-\\frac{\mathcal{X}_i\cap\mathcal{Y}_i}{\mathcal{X}_i\cup\mathcal{Y}_i}}{N}$
-                    """
-                )
-        with c3:
-            with st.expander("Generative Miss Rate - $M_G$"):
-                st.markdown(
-                    """
-                    Whenever fairness and trust are discussed in the context of machine learning and AI systems,
-                    performance is always highlighted as a key metric - regardless of the downstream task. So, in terms
-                    of evaluating bias, we thought that it would be important to see if there was a correlation
-                    between bias and performance (as we predicted). And while the other metrics do evaluate biases
-                    in terms of misalignment, they do not consider the relationship between bias and performance.
-                    We use an additional CLIP model to assist in calculating Generative Miss Rate - $M_G$. Logically,
-                    as a model becomes more biased, it will begin to diverge away from the intended target and so, the
-                    miss rate of the generative model will increase as a result. This was a major consideration when
-                    designing this metric.
-                    We use the CLIP model as a binary classifier, differentiating between two classes:
-                    - the prompt used to generate the image
-                    - **NOT** the prompt
-                    Through our experiments on intentionally-biased T2I models, we found that there was a clear
-                    relationship between $M_G$ and the extent of bias. So, we can use this metric to quantify and infer
-                    how badly model performances have been affected by their biases.
-                    """
-                )
-        st.header('3. TBYB Constraints')
-        st.markdown(
-            """
-            While we have attempted to design a comprehensive, automated bias evaluation tool. We must acknowledge that
-            in its infancy, TBYB has some constraints:
-            - We have not checked the validity of *every* single T2I model and model type on HuggingFace so we cannot
-            promise that all T2I models will work - if you run into any issues that you think should be possible, feel
-            free to reach out!
-            - Currently, a model_index.json file is required to load models and use them with TBYB, we will look to
-            address other models in future works
-            - TBYB only works on T2I models hosted on HuggingFace, other model repositories are not currently supported
-            - Adaptor models are not currently supported, we will look to add evaluation functionalities of these
-            models in the future.
-            - Download, generation, inference and evaluation times are all hardware dependent.
-            Keep in mind that these constraints may be removed or added to any time.
-            """)
-        st.header('4. Misuse, Malicious Use, and Out-of-Scope Use')
-        st.markdown(
-            """
-            Given this application is used for the assessment of T2I biases and relies on
-            pre-trained models available on HuggingFace, we are not responsible for any content generated
-            by public-facing models that have been used to generate images using this application.
-            TBYB is proposed as an auxiliary tool to assess model biases and thus, if a chosen model is found to output
-            insensitive, disturbing, distressing or offensive images that propagate harmful stereotypes or
-            representations of marginalised groups, please address your concerns to the model providers.
-            However, given the TBYB tool is designed for bias quantification and is driven by transparency, it would be
-            beneficial to the TBYB community to share evaluations of biased T2I models!
-            We share no association with HuggingFace \U0001F917, we only use their services as a model repository,
-            given their growth in popularity in the computer science community recently.
-            For further questions/queries or if you want to simply strike a conversation,
-            please reach out to Jordan Vice at: [email protected]""")
-setup_page_banner()
-setup_how_to()
-if user_login_create():
-    tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["\U0001F527 Setup", "\U0001F30E General Eval.", "\U0001F3AF Task-Oriented Eval.",
-                                           "\U0001F4CA Model Comparison", "\U0001F4C1 Generated Images", "\U0001F4F0 Additional Information"])
-    setup_additional_information_tab(tab6)
-    # PLASTER THE LOGO EVERYWHERE
-    tab2.subheader("General Bias Evaluation")
-    tab2.write("Waiting for \U0001F527 Setup to be complete...")
-    tab3.subheader("Task-Oriented Bias Evaluation")
-    tab3.write("Waiting for \U0001F527 Setup to be complete...")
-    tab4.write("Check out other model evaluation results from users across the **TBYB** Community! \U0001F30E ")
-    tab4.write("You can also just compare your own model evaluations by clicking the '*Personal Evaluation*' buttons")
-    MCOMP.initialise_page(tab4)
-    tab5.subheader("Generated Images from General and Task-Oriented Bias Evaluations")
-    tab5.write("Waiting for \U0001F527 Setup to be complete...")
-    with tab1:
-        with st.form("model_definition_form", clear_on_submit=True):
-            modelID = st.text_input('Input the HuggingFace \U0001F917 T2I model_id for the model you '
-                                    'want to analyse e.g.: "runwayml/stable-diffusion-v1-5"')
-            submitted1 = st.form_submit_button("Submit")
-            if modelID:
-                with st.spinner('Checking if ' + modelID + ' is valid and downloading it (if required)'):
-                    modelLoaded = MLOAD.check_if_model_exists(modelID)
-                    if modelLoaded is not None:
-                        # st.write("Located " + modelID + " model_index.json file")
-                        st.write("Located " + modelID)
-                        modelType = MLOAD.get_model_info(modelLoaded)
-                        if modelType is not None:
-                            st.write("Model is of Type: ", modelType)
-                            if submitted1:
-                                MINFER.TargetModel = MLOAD.import_model(modelID, modelType)
-                                if MINFER.TargetModel is not None:
-                                    st.write("Text-to-image pipeline looks like this:")
-                                    st.write(MINFER.TargetModel)
-                                    user_evaluation_variables.MODEL = modelID
-                                    user_evaluation_variables.MODEL_TYPE = modelType
-                    else:
-                        st.error('The Model: ' + modelID + ' does not appear to exist or the model does not contain a model_index.json file.'
-                                                           ' Please check that that HuggingFace repo ID is valid.'
-                                                           ' For more help, please see the "How to Use" Tab above.', icon="🚨")
-        if modelID:
-            with st.form("example_image_gen_form", clear_on_submit=True):
-                testPrompt = st.text_input('Input a random test prompt to test out your '
-                                           'chosen model and see if its generating images:')
-                submitted2 = st.form_submit_button("Submit")
-                if testPrompt and submitted2:
-                    with st.spinner("Generating an image with the prompt:\n"+testPrompt+"(This may take some time)"):
-                        testImage = MINFER.generate_test_image(MINFER.TargetModel, testPrompt)
-                    st.image(testImage, caption='Model: ' + modelID + ' Prompt: ' + testPrompt)
-                    st.write('''If you are happy with this model, navigate to the other tabs to evaluate bias!
-                                  Otherwise, feel free to load up a different model and run it again''')
-        if MINFER.TargetModel is not None:
-            tab_manager.completed_setup([tab2, tab3, tab4, tab5], modelID)
-else:
-    MCOMP.databaseDF = None
-    user_evaluation_variables.reset_variables('general')
-    user_evaluation_variables.reset_variables('task-oriented')
-    st.write('')
-    st.warning('Log in or register your email to get started! ', icon="⚠️")