import json
import gradio as gr
import os
from PIL import Image
import plotly.graph_objects as go
import plotly.express as px
import operator

TITLE = "Diffusion Faces Cluster Explorer"
clusters_12 = json.load(open("clusters/id_all_blip_clusters_12.json"))
clusters_24 = json.load(open("clusters/id_all_blip_clusters_24.json"))
clusters_48 = json.load(open("clusters/id_all_blip_clusters_48.json"))

clusters_by_size = {
    12: clusters_12,
    24: clusters_24,
    48: clusters_48,
}

def describe_cluster(cl_dict, block="label"):
    labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
    labels_values.reverse()
    total = float(sum(cl_dict.values()))
    lv_prcnt = list((item[0], round(item[1] * 100/total, 0)) for item in labels_values)
    description_string = "The most represented %s is %s, making up about %d%% of the cluster.\n" % (block, lv_prcnt[0][0], lv_prcnt[0][1])
    description_string += "This is followed by: "
    for label_value_tuple in lv_prcnt[1:]:
        description_string += "\n%s: %d%%" % label_value_tuple
    return description_string

def show_cluster(cl_id, num_clusters):
    if not cl_id:
        cl_id = 0
    if not num_clusters:
        num_clusters = 12
    cl_dct = clusters_by_size[num_clusters][cl_id]
    images = []
    for i in range(6):
        img_path = "/".join([st.replace("/", "") for st in cl_dct['img_path_list'][i].split("//")][3:])
        images.append((Image.open(os.path.join("identities-images", img_path)), "_".join([img_path.split("/")[0], img_path.split("/")[-1]]).replace('Photo_portrait_of_an_','').replace('Photo_portrait_of_a_','').replace('SD_v2_random_seeds_identity_','(SD v.2) ').replace('dataset-identities-dalle2_','(Dall-E 2) ').replace('SD_v1.4_random_seeds_identity_','(SD v.1.4) ').replace('_',' ')))
    model_fig = go.Figure()
    model_fig.add_trace(go.Pie(labels=list(dict(cl_dct["labels_model"]).keys()), 
                               values=list(dict(cl_dct["labels_model"]).values())))
    model_description = describe_cluster(dict(cl_dct["labels_model"]), "model")

    gender_fig = go.Figure()
    gender_fig.add_trace(go.Pie(labels=list(dict(cl_dct["labels_gender"]).keys()), 
                                values=list(dict(cl_dct["labels_gender"]).values())))
    gender_description = describe_cluster(dict(cl_dct["labels_gender"]), "gender")

    ethnicity_fig = go.Figure()
    ethnicity_fig.add_trace(go.Bar(x=list(dict(cl_dct["labels_ethnicity"]).keys()), 
                                   y=list(dict(cl_dct["labels_ethnicity"]).values()), 
                                   marker_color=px.colors.qualitative.G10))
    return (len(cl_dct['img_path_list']),
            gender_fig,gender_description,
            model_fig, model_description,
            ethnicity_fig,
            images)

with gr.Blocks(title=TITLE) as demo:
    gr.Markdown(f"# {TITLE}")
    gr.Markdown("## This Space lets you explore the clusters based on the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/society-ethics/DiffusionBiasExplorer).")
    gr.HTML("""<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image models and may depict offensive stereotypes or contain explicit content.</span>""")
    num_clusters = gr.Radio([12,24,48], value=12, labels="How many clusters do you want to make from the data?")

    
    with gr.Row():
        with gr.Column(scale=4):
            gallery = gr.Gallery(label="Most representative images in cluster").style(grid=(3,3))
        with gr.Column():
            cluster_id = gr.Slider(minimum=0, maximum=num_clusters.value-1, step=1, value=0, label="Click to move between clusters")
            a = gr.Text(label="Number of images")
    with gr.Row():
            with gr.Column(scale=1):
                c = gr.Plot(label="Model makeup of clssuster")
                c_desc = gr.Text(label="")
            with gr.Column(scale=1):
                b = gr.Plot(label="Gender label makeup of cluster")
                b_desc = gr.Text(label="")
            with gr.Column(scale=2):
                d = gr.Plot(label="Ethnicity label makeup of cluster")

    gr.Markdown(f"The 'Model makeup' plot corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2.")
    gr.Markdown(f"The 'Gender label makeup' plot corresponds to the number of images from each of the genders used in the prompts: male, female, non-binary and unmarked ('person')")
    gr.Markdown(f"The 'Ethnicity label makeup' plot corresponds to the number of images from each of the 18 ethnicities used in the prompts. A blank value means unmarked ethnicity")
    demo.load(fn=show_cluster, inputs=[cluster_id, num_clusters], outputs=[a, b, b_desc, c, c_desc, d, gallery])
    num_clusters.change(fn=show_cluster, inputs=[cluster_id, num_clusters], outputs=[a, b, b_desc, c, c_desc, d, gallery])
    cluster_id.change(fn=show_cluster, inputs=[cluster_id, num_clusters], outputs=[a, b, b_desc, c, c_desc, d, gallery])

if __name__ == "__main__":
    demo.queue().launch(debug=True)