Spaces:

society-ethics
/

DiffusionFaceClustering

Runtime error

meg-huggingface

Wording changes

bb54091 over 2 years ago

6.61 kB

	import json
	import gradio as gr
	import os
	from PIL import Image
	import plotly.graph_objects as go
	import plotly.express as px
	import operator

	TITLE = "Diffusion Faces Cluster Explorer"
	clusters_12 = json.load(open("clusters/id_all_blip_clusters_12.json"))
	clusters_24 = json.load(open("clusters/id_all_blip_clusters_24.json"))
	clusters_48 = json.load(open("clusters/id_all_blip_clusters_48.json"))

	clusters_by_size = {
	12: clusters_12,
	24: clusters_24,
	48: clusters_48,
	}


	def to_string(label):
	if label == "SD_2":
	label = "Stable Diffusion 2.0"
	elif label == "SD_14":
	label = "Stable Diffusion 1.4"
	elif label == "DallE":
	label = "Dall-E 2"
	elif label == "non-binary":
	label = "non-binary person"
	elif label == "person":
	label = "<i>unmarked</i> (person)"
	elif label == "gender":
	label = "gender term"
	return label


	def describe_cluster(cl_dict, block="label"):
	labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
	labels_values.reverse()
	total = float(sum(cl_dict.values()))
	lv_prcnt = list(
	(item[0], round(item[1] * 100 / total, 0)) for item in labels_values)
	top_label = lv_prcnt[0][0]
	description_string = "<span>The most represented %s is <b>%s</b>, making up about <b>%d%%</b> of the cluster.</span>" % (
	to_string(block), to_string(top_label), lv_prcnt[0][1])
	description_string += "<p>This is followed by: "
	for lv in lv_prcnt[1:]:
	description_string += "<BR/><b>%s:</b> %d%%" % (to_string(lv[0]), lv[1])
	description_string += "</p>"
	return description_string


	def show_cluster(cl_id, num_clusters):
	if not cl_id:
	cl_id = 0
	if not num_clusters:
	num_clusters = 12
	cl_dct = clusters_by_size[num_clusters][cl_id]
	images = []
	for i in range(6):
	img_path = "/".join([st.replace("/", "") for st in
	cl_dct['img_path_list'][i].split("//")][3:])
	images.append((Image.open(os.path.join("identities-images", img_path)),
	"_".join([img_path.split("/")[0],
	img_path.split("/")[-1]]).replace(
	'Photo_portrait_of_an_', '').replace(
	'Photo_portrait_of_a_', '').replace(
	'SD_v2_random_seeds_identity_', '(SD v.2) ').replace(
	'dataset-identities-dalle2_', '(Dall-E 2) ').replace(
	'SD_v1.4_random_seeds_identity_',
	'(SD v.1.4) ').replace('_', ' ')))
	model_fig = go.Figure()
	model_fig.add_trace(go.Pie(labels=list(dict(cl_dct["labels_model"]).keys()),
	values=list(
	dict(cl_dct["labels_model"]).values())))
	model_description = describe_cluster(dict(cl_dct["labels_model"]), "model")

	gender_fig = go.Figure()
	gender_fig.add_trace(
	go.Pie(labels=list(dict(cl_dct["labels_gender"]).keys()),
	values=list(dict(cl_dct["labels_gender"]).values())))
	gender_description = describe_cluster(dict(cl_dct["labels_gender"]),
	"gender")

	ethnicity_fig = go.Figure()
	ethnicity_fig.add_trace(
	go.Bar(x=list(dict(cl_dct["labels_ethnicity"]).keys()),
	y=list(dict(cl_dct["labels_ethnicity"]).values()),
	marker_color=px.colors.qualitative.G10))
	return (len(cl_dct['img_path_list']),
	gender_fig, gender_description,
	model_fig, model_description,
	ethnicity_fig,
	images,
	gr.update(maximum=num_clusters - 1))


	with gr.Blocks(title=TITLE) as demo:
	gr.Markdown(f"# {TITLE}")
	gr.Markdown(
	"## Explore the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/society-ethics/DiffusionBiasExplorer)!")
	gr.Markdown(
	"### This demo showcases patterns in the images generated from different prompts input to Stable Diffusion and Dalle-2 diffusion models.")
	gr.Markdown(
	"### Below, see results on how the images from different prompts cluster together.")
	gr.HTML(
	"""<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image models and may depict offensive stereotypes or contain explicit content.</span>""")
	num_clusters = gr.Radio([12, 24, 48], value=12,
	label="How many clusters do you want to make from the data?")

	with gr.Row():
	with gr.Column(scale=4):
	gallery = gr.Gallery(
	label="Most representative images in cluster").style(
	grid=(3, 3))
	with gr.Column():
	cluster_id = gr.Slider(minimum=0, maximum=num_clusters.value - 1,
	step=1, value=0,
	label="Click to move between clusters")
	a = gr.Text(label="Number of images")
	with gr.Row():
	with gr.Column(scale=1):
	c = gr.Plot(label="How many images from each model?")
	c_desc = gr.HTML(label="")
	with gr.Column(scale=1):
	b = gr.Plot(label="How many gender terms are represented?")
	b_desc = gr.HTML(label="")
	with gr.Column(scale=2):
	d = gr.Plot(label="Which ethnicity terms are present?")

	gr.Markdown(
	f"The 'Model makeup' plot corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2.")
	gr.Markdown(
	'The Gender plot shows the number of images based on the input prompts that used the words man, woman, non-binary person, and unmarked, which we label "person".')
	gr.Markdown(
	f"The 'Ethnicity label makeup' plot corresponds to the number of images from each of the 18 ethnicities used in the prompts. A blank value means unmarked ethnicity.")
	demo.load(fn=show_cluster, inputs=[cluster_id, num_clusters],
	outputs=[a, b, b_desc, c, c_desc, d, gallery, cluster_id])
	num_clusters.change(fn=show_cluster, inputs=[cluster_id, num_clusters],
	outputs=[a, b, b_desc, c, c_desc, d, gallery,
	cluster_id])
	cluster_id.change(fn=show_cluster, inputs=[cluster_id, num_clusters],
	outputs=[a, b, b_desc, c, c_desc, d, gallery, cluster_id])

	if __name__ == "__main__":
	demo.queue().launch(debug=True)