Spaces:

society-ethics
/

DiffusionFaceClustering

Runtime error

App Files Files Community

yjernite commited on Feb 16, 2023

Commit

0d44baa

1 Parent(s): 3911108

ethnicity description and dropdown selector

Browse files

Files changed (1) hide show

app.py +114 -55

app.py CHANGED Viewed

@@ -29,23 +29,32 @@ def to_string(label):
         label = "non-binary person"
     elif label == "person":
         label = "<i>unmarked</i> (person)"
     elif label == "gender":
         label = "gender term"
     return label
-def describe_cluster(cl_dict, block="label"):
     labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
     labels_values.reverse()
     total = float(sum(cl_dict.values()))
     lv_prcnt = list(
-        (item[0], round(item[1] * 100 / total, 0)) for item in labels_values)
     top_label = lv_prcnt[0][0]
-    description_string = "<span>The most represented %s is <b>%s</b>, making up about <b>%d%%</b> of the cluster.</span>" % (
-    to_string(block), to_string(top_label), lv_prcnt[0][1])
     description_string += "<p>This is followed by: "
-    for lv in lv_prcnt[1:]:
         description_string += "<BR/><b>%s:</b> %d%%" % (to_string(lv[0]), lv[1])
     description_string += "</p>"
     return description_string
@@ -58,65 +67,94 @@ def show_cluster(cl_id, num_clusters):
     cl_dct = clusters_by_size[num_clusters][cl_id]
     images = []
     for i in range(6):
-        img_path = "/".join([st.replace("/", "") for st in
-                             cl_dct['img_path_list'][i].split("//")][3:])
-        images.append((Image.open(os.path.join("identities-images", img_path)),
-                       "_".join([img_path.split("/")[0],
-                                 img_path.split("/")[-1]]).replace(
-                           'Photo_portrait_of_an_', '').replace(
-                           'Photo_portrait_of_a_', '').replace(
-                           'SD_v2_random_seeds_identity_', '(SD v.2) ').replace(
-                           'dataset-identities-dalle2_', '(Dall-E 2) ').replace(
-                           'SD_v1.4_random_seeds_identity_',
-                           '(SD v.1.4) ').replace('_', ' ')))
     model_fig = go.Figure()
-    model_fig.add_trace(go.Pie(labels=list(dict(cl_dct["labels_model"]).keys()),
-                               values=list(
-                                   dict(cl_dct["labels_model"]).values())))
     model_description = describe_cluster(dict(cl_dct["labels_model"]), "system")
     gender_fig = go.Figure()
     gender_fig.add_trace(
-        go.Pie(labels=list(dict(cl_dct["labels_gender"]).keys()),
-               values=list(dict(cl_dct["labels_gender"]).values())))
-    gender_description = describe_cluster(dict(cl_dct["labels_gender"]),
-                                          "gender")
     ethnicity_fig = go.Figure()
     ethnicity_fig.add_trace(
-        go.Bar(x=list(dict(cl_dct["labels_ethnicity"]).keys()),
-               y=list(dict(cl_dct["labels_ethnicity"]).values()),
-               marker_color=px.colors.qualitative.G10))
-    return (len(cl_dct['img_path_list']),
-            gender_fig, gender_description,
-            model_fig, model_description,
-            ethnicity_fig,
-            images,
-            gr.update(maximum=num_clusters - 1))
 with gr.Blocks(title=TITLE) as demo:
     gr.Markdown(f"# {TITLE}")
     gr.Markdown(
-        "## Explore the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/society-ethics/DiffusionBiasExplorer)!")
     gr.Markdown(
-        "### This demo showcases patterns in the images generated from different prompts input to Stable Diffusion and Dalle-2 systems.")
     gr.Markdown(
-        "### Below, see results on how the images from different prompts cluster together.")
     gr.HTML(
-        """<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image systems and may depict offensive stereotypes or contain explicit content.</span>""")
-    num_clusters = gr.Radio([12, 24, 48], value=12,
-                            label="How many clusters do you want to make from the data?")
     with gr.Row():
         with gr.Column(scale=4):
-            gallery = gr.Gallery(
-                label="Most representative images in cluster").style(
-                grid=(3, 3))
         with gr.Column():
-            cluster_id = gr.Slider(minimum=0, maximum=num_clusters.value - 1,
-                                   step=1, value=0,
-                                   label="Click to move between clusters")
             a = gr.Text(label="Number of images")
     with gr.Row():
         with gr.Column(scale=1):
@@ -127,20 +165,41 @@ with gr.Blocks(title=TITLE) as demo:
             b_desc = gr.HTML(label="")
         with gr.Column(scale=2):
             d = gr.Plot(label="Which ethnicity terms are present?")
     gr.Markdown(
-        f"The 'System makeup' plot corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2.")
     gr.Markdown(
-        'The Gender plot shows the number of images based on the input prompts that used the words man, woman, non-binary person, and unmarked, which we label "person".')
     gr.Markdown(
-        f"The 'Ethnicity label makeup' plot corresponds to the number of images from each of the 18 ethnicities used in the prompts. A blank value means unmarked ethnicity.")
-    demo.load(fn=show_cluster, inputs=[cluster_id, num_clusters],
-              outputs=[a, b, b_desc, c, c_desc, d, gallery, cluster_id])
-    num_clusters.change(fn=show_cluster, inputs=[cluster_id, num_clusters],
-                        outputs=[a, b, b_desc, c, c_desc, d, gallery,
-                                 cluster_id])
-    cluster_id.change(fn=show_cluster, inputs=[cluster_id, num_clusters],
-                      outputs=[a, b, b_desc, c, c_desc, d, gallery, cluster_id])
 if __name__ == "__main__":
-    demo.queue().launch(debug=True)

         label = "non-binary person"
     elif label == "person":
         label = "<i>unmarked</i> (person)"
+    elif label == "":
+        label = "<i>unmarked</i> ()"
     elif label == "gender":
         label = "gender term"
     return label
+def describe_cluster(cl_dict, block="label", max_items=4):
     labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
     labels_values.reverse()
     total = float(sum(cl_dict.values()))
     lv_prcnt = list(
+        (item[0], round(item[1] * 100 / total, 0)) for item in labels_values
+    )
     top_label = lv_prcnt[0][0]
+    description_string = (
+        "<span>The most represented %s is <b>%s</b>, making up about <b>%d%%</b> of the cluster.</span>"
+        % (to_string(block), to_string(top_label), lv_prcnt[0][1])
+    )
     description_string += "<p>This is followed by: "
+    for lv in lv_prcnt[1 : min(len(lv_prcnt), 1 + max_items)]:
         description_string += "<BR/><b>%s:</b> %d%%" % (to_string(lv[0]), lv[1])
+    if len(lv_prcnt) > max_items + 1:
+        description_string += "<BR/><b> - Other terms:</b> %d%%" % (
+            sum(lv[1] for lv in lv_prcnt[max_items + 1 :]),
+        )
     description_string += "</p>"
     return description_string
     cl_dct = clusters_by_size[num_clusters][cl_id]
     images = []
     for i in range(6):
+        img_path = "/".join(
+            [st.replace("/", "") for st in cl_dct["img_path_list"][i].split("//")][3:]
+        )
+        images.append(
+            (
+                Image.open(os.path.join("identities-images", img_path)),
+                "_".join([img_path.split("/")[0], img_path.split("/")[-1]])
+                .replace("Photo_portrait_of_an_", "")
+                .replace("Photo_portrait_of_a_", "")
+                .replace("SD_v2_random_seeds_identity_", "(SD v.2) ")
+                .replace("dataset-identities-dalle2_", "(Dall-E 2) ")
+                .replace("SD_v1.4_random_seeds_identity_", "(SD v.1.4) ")
+                .replace("_", " "),
+            )
+        )
     model_fig = go.Figure()
+    model_fig.add_trace(
+        go.Pie(
+            labels=list(dict(cl_dct["labels_model"]).keys()),
+            values=list(dict(cl_dct["labels_model"]).values()),
+        )
+    )
     model_description = describe_cluster(dict(cl_dct["labels_model"]), "system")
     gender_fig = go.Figure()
     gender_fig.add_trace(
+        go.Pie(
+            labels=list(dict(cl_dct["labels_gender"]).keys()),
+            values=list(dict(cl_dct["labels_gender"]).values()),
+        )
+    )
+    gender_description = describe_cluster(dict(cl_dct["labels_gender"]), "gender")
     ethnicity_fig = go.Figure()
     ethnicity_fig.add_trace(
+        go.Bar(
+            x=list(dict(cl_dct["labels_ethnicity"]).keys()),
+            y=list(dict(cl_dct["labels_ethnicity"]).values()),
+            marker_color=px.colors.qualitative.G10,
+        )
+    )
+    ethnicity_description = describe_cluster(
+        dict(cl_dct["labels_ethnicity"]), "ethnicity"
+    )
+    return (
+        len(cl_dct["img_path_list"]),
+        gender_fig,
+        gender_description,
+        model_fig,
+        model_description,
+        ethnicity_fig,
+        ethnicity_description,
+        images,
+    )
 with gr.Blocks(title=TITLE) as demo:
     gr.Markdown(f"# {TITLE}")
     gr.Markdown(
+        "## Explore the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/society-ethics/DiffusionBiasExplorer)!"
+    )
     gr.Markdown(
+        "### This demo showcases patterns in the images generated from different prompts input to Stable Diffusion and Dalle-2 systems."
+    )
     gr.Markdown(
+        "### Below, see results on how the images from different prompts cluster together."
+    )
     gr.HTML(
+        """<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image systems and may depict offensive stereotypes or contain explicit content.</span>"""
+    )
+    num_clusters = gr.Radio(
+        [12, 24, 48],
+        value=12,
+        label="How many clusters do you want to make from the data?",
+    )
     with gr.Row():
         with gr.Column(scale=4):
+            gallery = gr.Gallery(label="Most representative images in cluster").style(
+                grid=(3, 3)
+            )
         with gr.Column():
+            cluster_id = gr.Dropdown(
+                choices=[i for i in range(num_clusters.value)],
+                value=0,
+                label="Select cluster to visualize:",
+            )
             a = gr.Text(label="Number of images")
     with gr.Row():
         with gr.Column(scale=1):
             b_desc = gr.HTML(label="")
         with gr.Column(scale=2):
             d = gr.Plot(label="Which ethnicity terms are present?")
+            d_desc = gr.HTML(label="")
     gr.Markdown(
+        f"The 'System makeup' plot corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2."
+    )
     gr.Markdown(
+        'The Gender plot shows the number of images based on the input prompts that used the words man, woman, non-binary person, and unmarked, which we label "person".'
+    )
     gr.Markdown(
+        f"The 'Ethnicity label makeup' plot corresponds to the number of images from each of the 18 ethnicities used in the prompts. A blank value means unmarked ethnicity."
+    )
+    demo.load(
+        fn=show_cluster,
+        inputs=[cluster_id, num_clusters],
+        outputs=[a, b, b_desc, c, c_desc, d, d_desc, gallery],
+    )
+    num_clusters.change(
+        fn=show_cluster,
+        inputs=[cluster_id, num_clusters],
+        outputs=[
+            a,
+            b,
+            b_desc,
+            c,
+            c_desc,
+            d,
+            d_desc,
+            gallery,
+        ],
+    )
+    cluster_id.change(
+        fn=show_cluster,
+        inputs=[cluster_id, num_clusters],
+        outputs=[a, b, b_desc, c, c_desc, d, d_desc, gallery],
+    )
 if __name__ == "__main__":
+    demo.queue().launch(debug=True)