File size: 4,013 Bytes
72246a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a089b21
e9c936c
72246a1
 
a089b21
72246a1
 
800c1ec
 
72246a1
1c1793c
800c1ec
 
 
 
 
 
 
72246a1
800c1ec
72246a1
 
 
28b6676
 
188a77c
 
 
 
 
 
 
28b6676
 
 
8600eb0
 
 
 
 
800c1ec
72246a1
 
 
8600eb0
 
 
28b6676
 
 
8600eb0
72246a1
 
8600eb0
72246a1
 
8600eb0
 
 
 
 
 
 
 
 
 
 
28b6676
 
 
 
 
72246a1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import pandas as pd
import plotly.graph_objects as go

def get_covered_languages():
    #Load data
    all_languages = pd.read_csv('data/merged_language_list_with_duplicates.csv')
    with open("data/covered_languages.txt") as f:
        covered_languages = f.read().splitlines()

    # Split strings with commas and flatten the list
    covered_languages = [lang.strip() for sublist in covered_languages for lang in sublist.split(',')]
    covered_languages = list(set(covered_languages))

    # Get language codes
    covered_language_codes = [all_languages.loc[all_languages['Language'] == lang, 'Code'].values[0] for lang in covered_languages if lang in all_languages['Language'].values]
    assert len(covered_language_codes) == len(covered_languages), "Mismatch between covered languages and their codes"
    return covered_language_codes
    
def build_dataframes(covered_language_codes):
    # Load data
    clean_languages = pd.read_csv('data/merged_language_list_clean.csv')

    # Create a dataframe for languages with a lead
    languages_with_lead = clean_languages[clean_languages['Code'].isin(covered_language_codes)].sort_values(by='Code')
    languages_with_lead.to_csv('data/languages_with_lead.csv', index=False)

    # Create a dataframe for languages without a lead
    languages_without_lead = clean_languages[~clean_languages['Code'].isin(covered_language_codes)].sort_values(by='Code')
    return languages_with_lead, languages_without_lead


def create_piechart():

    colors = ['#ffd21e', '#0b4a70']
    fig = go.Figure(
        go.Pie(
            labels=["With lead", "Without lead"],
            values=[len(languages_with_lead), len(languages_without_lead)],
            marker=dict(colors=colors)
        )
    )

    fig.update_traces(textposition='inside', textinfo='label+value')

    return fig

def filter_dataframes(search_term=None):
    if search_term:
        search_terms = search_term.lower().split(" ")
        filtered_with_lead = languages_with_lead[
            languages_with_lead.apply(lambda row: any(term in str(row['Language']).lower() or term in str(row['Code']).lower() for term in search_terms), axis=1)
        ]
        filtered_without_lead = languages_without_lead[
            languages_without_lead.apply(lambda row: any(term in str(row['Language']).lower() or term in str(row['Code']).lower() for term in search_terms), axis=1)
        ]
        return filtered_without_lead, filtered_with_lead
    else:
        return languages_without_lead, languages_with_lead

def load_demo():
    languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages())
    piechart = create_piechart()
    return languages_without_lead,languages_with_lead,piechart

with gr.Blocks() as demo:
    gr.Markdown("## Language Leads Dashboard")
    languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages())
    gr_piechart = gr.Plot(label="Language Leads")

    search_box = gr.Textbox(type="text", label="Search your language:")
    with gr.Row():
        search_button = gr.Button("Search πŸ”Ž")
        reset_button = gr.Button("Reset πŸ”")
    
    with gr.Tab("Looking for leads!"):
        gr.Markdown("These languages don't have a lead yet! Would you like to lead one of them? Sign up using [this form](https://forms.gle/mFCMXNRjxvyFvW5q9).")
        gr_languages_without_lead = gr.DataFrame()
    with gr.Tab("Languages with leads"):
        gr.Markdown("We found at least one lead for these languages:")
        gr_languages_with_lead = gr.DataFrame()

    demo.load(
        load_demo,
        outputs=[gr_languages_without_lead, gr_languages_with_lead, gr_piechart],
    )
    search_button.click(
        fn=filter_dataframes,
        inputs=search_box,
        outputs=[gr_languages_without_lead,gr_languages_with_lead]
    )
    reset_button.click(
        fn=filter_dataframes,
        inputs=None,
        outputs=[gr_languages_without_lead,gr_languages_with_lead]
    )

demo.launch()