yjernite commited on
Commit
2f582d1
·
1 Parent(s): 997ca15

summary description and selection

Browse files
Files changed (1) hide show
  1. app.py +82 -17
app.py CHANGED
@@ -36,6 +36,53 @@ def to_string(label):
36
  return label
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def describe_cluster(cl_dict, block="label", max_items=4):
40
  labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
41
  labels_values.reverse()
@@ -62,6 +109,12 @@ def describe_cluster(cl_dict, block="label", max_items=4):
62
  def show_cluster(cl_id, num_clusters):
63
  if not cl_id:
64
  cl_id = 0
 
 
 
 
 
 
65
  if not num_clusters:
66
  num_clusters = 12
67
  cl_dct = clusters_by_size[num_clusters][cl_id]
@@ -71,9 +124,17 @@ def show_cluster(cl_id, num_clusters):
71
  [st.replace("/", "") for st in cl_dct["img_path_list"][i].split("//")][3:]
72
  )
73
  im = Image.open(os.path.join("identities-images", img_path))
74
- #.resize((256, 256))
75
- caption = "_".join([img_path.split("/")[0], img_path.split("/")[-1]]).replace("Photo_portrait_of_an_", "").replace("Photo_portrait_of_a_", "").replace("SD_v2_random_seeds_identity_", "(SD v.2) ").replace("dataset-identities-dalle2_", "(Dall-E 2) ").replace("SD_v1.4_random_seeds_identity_", "(SD v.1.4) ").replace("_", " ")
76
- images.append((im,caption))
 
 
 
 
 
 
 
 
77
  model_fig = go.Figure()
78
  model_fig.add_trace(
79
  go.Pie(
@@ -105,7 +166,7 @@ def show_cluster(cl_id, num_clusters):
105
  )
106
 
107
  return (
108
- len(cl_dct["img_path_list"]),
109
  gender_fig,
110
  gender_description,
111
  model_fig,
@@ -113,15 +174,18 @@ def show_cluster(cl_id, num_clusters):
113
  ethnicity_fig,
114
  ethnicity_description,
115
  images,
116
- gr.update(choices=[i for i in range(num_clusters)]),
 
117
  )
118
 
119
 
120
  with gr.Blocks(title=TITLE) as demo:
121
  gr.Markdown(f"# {TITLE}")
122
  gr.Markdown(
123
- "Explore the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/society-ethics/DiffusionBiasExplorer)! This demo showcases patterns in the images generated from different prompts input to Stable Diffusion and Dalle-2 systems.")
124
- gr.Markdown("See the results on how the images from different prompts cluster together below."
 
 
125
  )
126
  gr.HTML(
127
  """<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image systems and may depict offensive stereotypes or contain explicit content.</span>"""
@@ -135,13 +199,17 @@ with gr.Blocks(title=TITLE) as demo:
135
  with gr.Row():
136
  with gr.Column():
137
  cluster_id = gr.Dropdown(
138
- choices=[i for i in range(num_clusters.value)],
 
 
139
  value=0,
140
  label="Select cluster to visualize:",
141
  )
142
- a = gr.Text(label="Number of images")
143
  with gr.Column():
144
- gallery = gr.Gallery(label="Most representative images in cluster").style(grid=[2,4], height="auto")
 
 
145
  with gr.Row():
146
  with gr.Column():
147
  c = gr.Plot(label="How many images from each system?")
@@ -154,13 +222,10 @@ with gr.Blocks(title=TITLE) as demo:
154
  d_desc = gr.HTML(label="")
155
 
156
  gr.Markdown(
157
- f"The 'System makeup' plot corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2."
158
- )
159
- gr.Markdown(
160
- 'The Gender plot shows the number of images based on the input prompts that used the words man, woman, non-binary person, and unmarked, which we label "person".'
161
- )
162
- gr.Markdown(
163
- f"The 'Ethnicity label makeup' plot corresponds to the number of images from each of the 18 ethnicities used in the prompts. A blank value means unmarked ethnicity."
164
  )
165
  demo.load(
166
  fn=show_cluster,
 
36
  return label
37
 
38
 
39
+ def summarize_clusters(clusters_list, max_terms=3):
40
+ for cl_id, cl_dict in enumerate(clusters_list):
41
+ total = len(cl_dict["img_path_list"])
42
+ gdr_list = cl_dict["labels_gender"]
43
+ eth_list = cl_dict["labels_ethnicity"]
44
+ cl_dict["sentence_desc"] = (
45
+ f"Cluster {cl_id} | \t"
46
+ + f"gender terms incl.: {gdr_list[0][0].replace('person', 'unmarked(gender)')}"
47
+ + (
48
+ f" - {gdr_list[1][0].replace('person', 'unmarked(gender)')} | "
49
+ if len(gdr_list) > 1
50
+ else " | "
51
+ )
52
+ + f"ethnicity terms incl.: {'unmarked(ethnicity)' if eth_list[0][0] == '' else eth_list[0][0]}"
53
+ + (
54
+ f" - {'unmarked(ethnicity)' if eth_list[1][0] == '' else eth_list[1][0]}"
55
+ if len(eth_list) > 1
56
+ else ""
57
+ )
58
+ )
59
+ cl_dict["summary_desc"] = (
60
+ f"Cluster {cl_id} has {total} images.\n"
61
+ + f"- The most represented gender terms are {gdr_list[0][0].replace('person', 'unmarked')} ({gdr_list[0][1]})"
62
+ + (
63
+ f" and {gdr_list[1][0].replace('person', 'unmarked')} ({gdr_list[1][1]}).\n"
64
+ if len(gdr_list) > 1
65
+ else ".\n"
66
+ )
67
+ + f"- The most represented ethnicity terms are {'unmarked' if eth_list[0][0] == '' else eth_list[0][0]} ({eth_list[0][1]})"
68
+ + (
69
+ f" and {'unmarked' if eth_list[1][0] == '' else eth_list[1][0]} ({eth_list[1][1]}).\n"
70
+ if len(eth_list) > 1
71
+ else ".\n"
72
+ )
73
+ + "See below for a more detailed description."
74
+ )
75
+
76
+
77
+ for _, clusters_list in clusters_by_size.items():
78
+ summarize_clusters(clusters_list)
79
+
80
+ dropdown_descs = dict(
81
+ (num_clusters, [cl_dct["sentence_desc"] for cl_dct in clusters_list])
82
+ for num_clusters, clusters_list in clusters_by_size.items()
83
+ )
84
+
85
+
86
  def describe_cluster(cl_dict, block="label", max_items=4):
87
  labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
88
  labels_values.reverse()
 
109
  def show_cluster(cl_id, num_clusters):
110
  if not cl_id:
111
  cl_id = 0
112
+ else:
113
+ cl_id = (
114
+ dropdown_descs[num_clusters].index(cl_id)
115
+ if cl_id in dropdown_descs[num_clusters]
116
+ else 0
117
+ )
118
  if not num_clusters:
119
  num_clusters = 12
120
  cl_dct = clusters_by_size[num_clusters][cl_id]
 
124
  [st.replace("/", "") for st in cl_dct["img_path_list"][i].split("//")][3:]
125
  )
126
  im = Image.open(os.path.join("identities-images", img_path))
127
+ # .resize((256, 256))
128
+ caption = (
129
+ "_".join([img_path.split("/")[0], img_path.split("/")[-1]])
130
+ .replace("Photo_portrait_of_an_", "")
131
+ .replace("Photo_portrait_of_a_", "")
132
+ .replace("SD_v2_random_seeds_identity_", "(SD v.2) ")
133
+ .replace("dataset-identities-dalle2_", "(Dall-E 2) ")
134
+ .replace("SD_v1.4_random_seeds_identity_", "(SD v.1.4) ")
135
+ .replace("_", " ")
136
+ )
137
+ images.append((im, caption))
138
  model_fig = go.Figure()
139
  model_fig.add_trace(
140
  go.Pie(
 
166
  )
167
 
168
  return (
169
+ clusters_by_size[num_clusters][cl_id]["summary_desc"],
170
  gender_fig,
171
  gender_description,
172
  model_fig,
 
174
  ethnicity_fig,
175
  ethnicity_description,
176
  images,
177
+ gr.update(choices=dropdown_descs[num_clusters]),
178
+ # gr.update(choices=[i for i in range(num_clusters)]),
179
  )
180
 
181
 
182
  with gr.Blocks(title=TITLE) as demo:
183
  gr.Markdown(f"# {TITLE}")
184
  gr.Markdown(
185
+ "Explore the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/society-ethics/DiffusionBiasExplorer)! This demo showcases patterns in the images generated from different prompts input to Stable Diffusion and Dalle-2 systems."
186
+ )
187
+ gr.Markdown(
188
+ "See the results on how the images from different prompts cluster together below."
189
  )
190
  gr.HTML(
191
  """<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image systems and may depict offensive stereotypes or contain explicit content.</span>"""
 
199
  with gr.Row():
200
  with gr.Column():
201
  cluster_id = gr.Dropdown(
202
+ choices=dropdown_descs[
203
+ num_clusters.value
204
+ ], # [i for i in range(num_clusters.value)],
205
  value=0,
206
  label="Select cluster to visualize:",
207
  )
208
+ a = gr.Text(label="Cluster summary")
209
  with gr.Column():
210
+ gallery = gr.Gallery(label="Most representative images in cluster").style(
211
+ grid=[2, 4], height="auto"
212
+ )
213
  with gr.Row():
214
  with gr.Column():
215
  c = gr.Plot(label="How many images from each system?")
 
222
  d_desc = gr.HTML(label="")
223
 
224
  gr.Markdown(
225
+ "### Plot Descriptions \n\n"
226
+ + " The **System makeup** plot (*left*) corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2.\n\n"
227
+ + " The **Gender term makeup** plot (*middle*) shows the number of images based on the input prompts that used the phrases man, woman, non-binary person, and person (unmarked) to describe the figure's gender.\n\n"
228
+ + " The **Ethnicity label makeup** plot (*right*) corresponds to the number of images from each of the 18 ethnicity descriptions used in the prompts. A blank value denotes unmarked ethnicity.\n\n"
 
 
 
229
  )
230
  demo.load(
231
  fn=show_cluster,