Spaces:
Sleeping
Sleeping
File size: 1,578 Bytes
450856d d5df872 450856d b41a0ac 450856d 3c986cb 450856d d5df872 b41a0ac a3bc6dc b41a0ac a3bc6dc b41a0ac 90ed9b3 ae90632 90ed9b3 450856d 90ed9b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import streamlit as st
import os
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
HF_TOKEN = os.environ.get("HF_TOKEN")
ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN)
df = ds.to_pandas()
st.title("MMLU Translations Progress")
# Extract the language from the metadata column and create a new column
df['language'] = df['metadata'].apply(lambda x: x.get('language'))
# Count the occurrences of each language
language_counts = df['language'].value_counts()
# Plotting the bar chart using matplotlib
fig, ax = plt.subplots()
language_counts.plot(kind='bar', ax=ax)
ax.set_title('Number of Completed Tasks for Each Language')
ax.set_xlabel('Language')
ax.set_ylabel('Count')
# Convert the language counts to a DataFrame for display in the table
language_counts_df = language_counts.reset_index()
language_counts_df.columns = ['Language', 'Count']
# Display the table in the Streamlit app
st.table(language_counts_df)
# Display the plot in the Streamlit app
st.pyplot(fig)
# Extract user_id from the is_edit_required field in the response column and count occurrences
user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id'])
user_id_counts = user_ids.value_counts()
# Convert the user ID counts to a DataFrame for display in the table
user_id_counts_df = user_id_counts.reset_index()
user_id_counts_df.columns = ['User ID', 'Count']
# Display the table of user ID counts in the Streamlit app
st.table(user_id_counts_df)
st.dataframe(df)
|