import gradio as gr | |
import pandas as pd | |
import plotly.graph_objects as go | |
def get_covered_languages(): | |
#Load data | |
all_languages = pd.read_csv('data/merged_language_list_with_duplicates.csv') | |
with open("data/covered_languages.txt") as f: | |
covered_languages = f.read().splitlines() | |
# Split strings with commas and flatten the list | |
covered_languages = [lang.strip() for sublist in covered_languages for lang in sublist.split(',')] | |
covered_languages = list(set(covered_languages)) | |
# Get language codes | |
covered_language_codes = [all_languages.loc[all_languages['Language'] == lang, 'Code'].values[0] for lang in covered_languages if lang in all_languages['Language'].values] | |
assert len(covered_language_codes) == len(covered_languages), "Mismatch between covered languages and their codes" | |
return covered_language_codes | |
def build_dataframes(covered_language_codes): | |
# Load data | |
clean_languages = pd.read_csv('data/merged_language_list_clean.csv') | |
# Create a dataframe for languages with a lead | |
languages_with_lead = clean_languages[clean_languages['Code'].isin(covered_language_codes)] | |
# Create a dataframe for languages without a lead | |
languages_without_lead = clean_languages[~clean_languages['Code'].isin(covered_language_codes)] | |
return languages_with_lead, languages_without_lead | |
# def create_progress_bar(languages_with_lead, languages_without_lead): | |
# top_labels = ['With lead', 'Without lead'] | |
# colors = ['rgba(38, 24, 74, 0.8)', 'rgba(190, 192, 213, 1)'] | |
# x_data = [len(languages_with_lead), len(languages_without_lead)] | |
# y_data = ['Progress'] | |
# fig = go.Figure() | |
# for i in range(0, len(x_data[0])): | |
# for xd, yd in zip(x_data, y_data): | |
# fig.add_trace(go.Bar( | |
# x=[xd[i]], y=[yd], | |
# orientation='h', | |
# marker=dict( | |
# color=colors[i], | |
# line=dict(color='rgb(248, 248, 249)', width=1) | |
# ), | |
# hoverinfo='text', | |
# hovertext=f"{top_labels[i]} records: {xd[i]}" | |
# )) | |
# fig.update_layout( | |
# xaxis=dict( | |
# showgrid=False, | |
# showline=False, | |
# showticklabels=False, | |
# zeroline=False, | |
# domain=[0.15, 1] | |
# ), | |
# yaxis=dict( | |
# showgrid=False, | |
# showline=False, | |
# showticklabels=False, | |
# zeroline=False, | |
# domain=[0.15, 0.5] | |
# ), | |
# barmode='stack', | |
# paper_bgcolor='rgb(248, 248, 255)', | |
# plot_bgcolor='rgb(248, 248, 255)', | |
# margin=dict(l=120, r=10, t=140, b=80), | |
# showlegend=False | |
# ) | |
# annotations = [] | |
# for yd, xd in zip(y_data, x_data): | |
# # labeling the y-axis | |
# annotations.append(dict(xref='paper', yref='y', | |
# x=0.14, y=yd, | |
# xanchor='right', | |
# text=str(yd), | |
# font=dict(family='Arial', size=14, | |
# color='rgb(67, 67, 67)'), | |
# showarrow=False, align='right')) | |
# # labeling the first percentage of each bar (x_axis) | |
# if xd[0] > 0: | |
# annotations.append(dict(xref='x', yref='y', | |
# x=xd[0] / 2, y=yd, | |
# text=str(xd[0]), | |
# font=dict(family='Arial', size=14, | |
# color='rgb(248, 248, 255)'), | |
# showarrow=False)) | |
# space = xd[0] | |
# for i in range(1, len(xd)): | |
# if xd[i] > 0: | |
# # labeling the rest of percentages for each bar (x_axis) | |
# annotations.append(dict(xref='x', yref='y', | |
# x=space + (xd[i]/2), y=yd, | |
# text=str(xd[i]), | |
# font=dict(family='Arial', size=14, | |
# color='rgb(248, 248, 255)'), | |
# showarrow=False)) | |
# space += xd[i] | |
# fig.update_layout(annotations=annotations, height=80) | |
# return fig | |
def create_piechart(): | |
colors = ['rgba(38, 24, 74, 0.8)', 'rgba(190, 192, 213, 1)'] | |
fig = go.Figure( | |
go.Pie( | |
labels=["With lead", "Without lead"], | |
values=[len(languages_with_lead), len(languages_without_lead)], | |
marker=dict(colors=colors) | |
) | |
) | |
fig.update_layout( | |
title_text="Language Leads", | |
height=500, | |
margin=dict(l=10, r=10, t=50, b=10) | |
) | |
fig.update_traces(textposition='inside', textinfo='label+value') | |
return fig | |
with gr.Blocks() as demo: | |
gr.Markdown("## Language Leads Dashboard") | |
languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages()) | |
with gr.Row(): | |
piechart = create_piechart() | |
gr.Plot(value=piechart, label="Language Leads") | |
with gr.Tab("Looking for leads!"): | |
gr.Markdown("These languages don't have a lead yet! Would you like to lead one of them? Sign up using [this form](https://forms.gle/mFCMXNRjxvyFvW5q9).") | |
gr.DataFrame(languages_without_lead) | |
with gr.Tab("Languages with leads"): | |
gr.Markdown("We found at least one lead for these languages:") | |
gr.DataFrame(languages_with_lead) | |
demo.launch() |