File size: 1,989 Bytes
450856d
d5df872
450856d
b41a0ac
 
7ac4c38
450856d
7ac4c38
 
 
450856d
 
7ac4c38
 
 
 
 
 
 
 
 
3c986cb
450856d
 
 
d5df872
 
b41a0ac
 
 
 
 
 
 
 
 
a3bc6dc
b41a0ac
 
 
a3bc6dc
 
 
 
 
 
 
b41a0ac
 
 
90ed9b3
 
ae90632
90ed9b3
 
7ac4c38
 
 
90ed9b3
 
7ac4c38
90ed9b3
7ac4c38
90ed9b3
 
7ac4c38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import streamlit as st
import os
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import argilla as rg


ARGILLA_API_URL = os.environ.get("ARGILLA_API_URL")
ARGILLA_API_KEY = os.environ.get("ARGILLA_API_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")

client =  rg.Argilla(
    api_url=ARGILLA_API_URL,
    api_key=ARGILLA_API_KEY
)

workspace = client.workspaces('cohere')

users_map = {str(user.id):user.username for user in list(workspace.users)}

ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN)

df = ds.to_pandas()

st.title("MMLU Translations Progress")

# Extract the language from the metadata column and create a new column
df['language'] = df['metadata'].apply(lambda x: x.get('language'))

# Count the occurrences of each language
language_counts = df['language'].value_counts()

# Plotting the bar chart using matplotlib
fig, ax = plt.subplots()
language_counts.plot(kind='bar', ax=ax)
ax.set_title('Number of Completed Tasks for Each Language')
ax.set_xlabel('Language')
ax.set_ylabel('Count')

# Convert the language counts to a DataFrame for display in the table
language_counts_df = language_counts.reset_index()
language_counts_df.columns = ['Language', 'Count']

# Display the table in the Streamlit app
st.table(language_counts_df)

# Display the plot in the Streamlit app
st.pyplot(fig)


# Extract user_id from the is_edit_required field in the response column and count occurrences
user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id'])
user_id_counts = user_ids.value_counts()

# Map user IDs to usernames
user_id_counts.index = user_id_counts.index.map(users_map)

# Convert the user ID counts to a DataFrame for display in the table
user_id_counts_df = user_id_counts.reset_index()
user_id_counts_df.columns = ['Username', 'Count']

# Display the table of username counts in the Streamlit app
st.table(user_id_counts_df)

st.dataframe(df)