MultiPL-E / app.py
arjunguha's picture
Bugfix
99a9bf4
raw
history blame
3.4 kB
import gradio as gr
import pandas as pd
import numpy as np
# Dictionary mapping file extensions to full language names
extension_to_language = {
"clj": "Clojure",
"cpp": "C++",
"cs": "C#",
"d": "D",
"elixir": "Elixir",
"go": "Go",
"hs": "Haskell",
"java": "Java",
"jl": "Julia",
"js": "JavaScript",
"lua": "Lua",
"ml": "OCaml",
"php": "PHP",
"pl": "Perl",
"r": "R",
"rb": "Ruby",
"rkt": "Racket",
"rs": "Rust",
"scala": "Scala",
"sh": "Shell",
"swift": "Swift",
"ts": "TypeScript"
}
# Read the CSV file
df = pd.read_csv('passk.csv')
# Function to extract language and model from Dataset
def extract_info(dataset):
parts = dataset.split('-')
language = parts[1]
model = '-'.join(parts[2:-2])
return pd.Series({'Language': language, 'Model': model})
# Extract language and model information
df[['Language', 'Model']] = df['Dataset'].apply(extract_info)
# Create a dictionary to map models to friendly names
model_to_friendly = {
"starcoder2_15b": "StarCoder2-15B",
"deepseekcoder_v2lite": "DeepSeekCoder2-Lite"
}
# Function to get friendly name or original name if not in the dictionary
def get_friendly_name(model):
return model_to_friendly.get(model, model)
# Create a pivot table
pivot = df.pivot(index='Model', columns='Language', values='Estimate')
# Get unique languages and models
languages = sorted(pivot.columns)
models = sorted(pivot.index)
# Function to update the table based on selected languages
def update_table(selected_languages):
if not selected_languages:
return pd.DataFrame({'Model': [get_friendly_name(model) for model in models]})
display_data = pivot[selected_languages].replace(np.nan, "-")
display_data = display_data.applymap(lambda x: f"{x:.3f}" if isinstance(x, (int, float)) else x)
# Add the Model column as the first column
display_data.insert(0, 'Model', [get_friendly_name(model) for model in display_data.index])
# Reset the index to remove the model names from the index
display_data = display_data.reset_index(drop=True)
# Rename columns to full language names
display_data.columns = ['Model'] + [extension_to_language.get(lang, lang) for lang in selected_languages]
return display_data
# Function to get initial table data
def get_initial_table():
return update_table(languages)
# Create the Gradio interface
with gr.Blocks() as app:
gr.Markdown("# Model Leaderboard")
with gr.Row():
language_checkboxes = gr.CheckboxGroup(
choices=[f"{extension_to_language[lang]} ({lang})" for lang in languages],
label="Select Languages",
value=[f"{extension_to_language[lang]} ({lang})" for lang in languages]
)
table = gr.Dataframe(
value=get_initial_table,
headers=['Model'] + [extension_to_language.get(lang, lang) for lang in languages],
type="pandas"
)
def update_table_wrapper(selected_languages):
# Extract language codes from the selected full names
selected_codes = [lang.split('(')[-1].strip(')') for lang in selected_languages]
return update_table(selected_codes)
language_checkboxes.change(update_table_wrapper, inputs=[language_checkboxes], outputs=[table])
# Launch the app
if __name__ == "__main__":
app.launch()