Spaces:

nataliaElv
/

language-leads-dashboard

Running

File size: 5,645 Bytes

import gradio as gr
import pandas as pd
import plotly.graph_objects as go

def get_covered_languages():
    #Load data
    all_languages = pd.read_csv('data/merged_language_list_with_duplicates.csv')
    with open("data/covered_languages.txt") as f:
        covered_languages = f.read().splitlines()

    # Split strings with commas and flatten the list
    covered_languages = [lang.strip() for sublist in covered_languages for lang in sublist.split(',')]
    covered_languages = list(set(covered_languages))

    # Get language codes
    covered_language_codes = [all_languages.loc[all_languages['Language'] == lang, 'Code'].values[0] for lang in covered_languages if lang in all_languages['Language'].values]
    assert len(covered_language_codes) == len(covered_languages), "Mismatch between covered languages and their codes"
    return covered_language_codes
    
def build_dataframes(covered_language_codes):
    # Load data
    clean_languages = pd.read_csv('data/merged_language_list_clean.csv')

    # Create a dataframe for languages with a lead
    languages_with_lead = clean_languages[clean_languages['Code'].isin(covered_language_codes)]

    # Create a dataframe for languages without a lead
    languages_without_lead = clean_languages[~clean_languages['Code'].isin(covered_language_codes)]
    return languages_with_lead, languages_without_lead

# def create_progress_bar(languages_with_lead, languages_without_lead):
#     top_labels = ['With lead', 'Without lead']

#     colors = ['rgba(38, 24, 74, 0.8)', 'rgba(190, 192, 213, 1)']

#     x_data = [len(languages_with_lead), len(languages_without_lead)]

#     y_data = ['Progress']

#     fig = go.Figure()

#     for i in range(0, len(x_data[0])):
#         for xd, yd in zip(x_data, y_data):
#             fig.add_trace(go.Bar(
#                 x=[xd[i]], y=[yd],
#                 orientation='h',
#                 marker=dict(
#                     color=colors[i],
#                     line=dict(color='rgb(248, 248, 249)', width=1)
#                 ),
#                 hoverinfo='text',
#                 hovertext=f"{top_labels[i]} records: {xd[i]}"
#             ))

#     fig.update_layout(
#         xaxis=dict(
#             showgrid=False,
#             showline=False,
#             showticklabels=False,
#             zeroline=False,
#             domain=[0.15, 1]

#         ),
#         yaxis=dict(
#             showgrid=False,
#             showline=False,
#             showticklabels=False,
#             zeroline=False,
#             domain=[0.15, 0.5]
#         ),
#         barmode='stack',
#         paper_bgcolor='rgb(248, 248, 255)',
#         plot_bgcolor='rgb(248, 248, 255)',
#         margin=dict(l=120, r=10, t=140, b=80),
#         showlegend=False
#     )

#     annotations = []

#     for yd, xd in zip(y_data, x_data):
#         # labeling the y-axis
#         annotations.append(dict(xref='paper', yref='y',
#                                 x=0.14, y=yd,
#                                 xanchor='right',
#                                 text=str(yd),
#                                 font=dict(family='Arial', size=14,
#                                         color='rgb(67, 67, 67)'),
#                                 showarrow=False, align='right'))
#         # labeling the first percentage of each bar (x_axis)
#         if xd[0] > 0:
#             annotations.append(dict(xref='x', yref='y',
#                                     x=xd[0] / 2, y=yd,
#                                     text=str(xd[0]),
#                                     font=dict(family='Arial', size=14,
#                                             color='rgb(248, 248, 255)'),
#                                     showarrow=False))
#         space = xd[0]
#         for i in range(1, len(xd)):
#             if xd[i] > 0:
#                 # labeling the rest of percentages for each bar (x_axis)
#                 annotations.append(dict(xref='x', yref='y',
#                                         x=space + (xd[i]/2), y=yd,
#                                         text=str(xd[i]),
#                                         font=dict(family='Arial', size=14,
#                                                 color='rgb(248, 248, 255)'),
#                                         showarrow=False))
#                 space += xd[i]

#     fig.update_layout(annotations=annotations, height=80)  
#     return fig


def create_piechart():

    colors = ['rgba(38, 24, 74, 0.8)', 'rgba(190, 192, 213, 1)']
    fig = go.Figure(
        go.Pie(
            labels=["With lead", "Without lead"],
            values=[len(languages_with_lead), len(languages_without_lead)],
            marker=dict(colors=colors)
        )
    )

    fig.update_layout(
        
        title_text="Language Leads",
        height=500,
        margin=dict(l=10, r=10, t=50, b=10)
    )
    fig.update_traces(textposition='inside', textinfo='label+value')

    return fig


with gr.Blocks() as demo:
    gr.Markdown("## Language Leads Dashboard")
    languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages())
    with gr.Row():
        piechart = create_piechart()
        gr.Plot(value=piechart, label="Language Leads")
    with gr.Tab("Looking for leads!"):
        gr.Markdown("These languages don't have a lead yet! Would you like to lead one of them? Sign up using [this form](https://forms.gle/mFCMXNRjxvyFvW5q9).")
        gr.DataFrame(languages_without_lead)
    with gr.Tab("Languages with leads"):
        gr.Markdown("We found at least one lead for these languages:")
        gr.DataFrame(languages_with_lead)

demo.launch()