Spaces:
Runtime error
Runtime error
File size: 1,384 Bytes
cd37af8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import streamlit as st
import pandas as pd
from filter_dataframe import filter_dataframe
@st.cache_data
def get_language_stats_df():
return pd.read_parquet("data/datasets_stats.parquet")
def export_citations(df: pd.DataFrame) -> str:
return "\n\n".join(df["citation"].tolist())
_MMS_CITATION = """\
#TODO: Add MMS citation
"""
CITATION_SEPARATOR = "\n% " + ("-" * 90) + "\n"
def export_citations(df: pd.DataFrame):
dataset_names = df.original_dataset.tolist()
dataset_citations = df.citation.tolist()
df = pd.DataFrame({"dataset": dataset_names, "citation": dataset_citations})
citations_grouped_df = df.groupby("citation").agg({"dataset": lambda x: ", ".join(x)}).reset_index().sort_values(by="dataset")
dataset_citations = ("% Datasets: " + citations_grouped_df["dataset"] + "\n" + citations_grouped_df["citation"]).to_list()
dataset_citations_joined = CITATION_SEPARATOR.join(dataset_citations)
return f"{_MMS_CITATION}\n\n{dataset_citations_joined}"
st.set_page_config(page_title="Dataset statistics", page_icon="📈")
st.markdown("# Dataset statistics")
st.write(
"""TODO: Description"""
)
df = get_language_stats_df()
df_filter = filter_dataframe(df)
st.dataframe(df_filter)
if st.button("Export citations"):
print("BLEEEEE")
print(export_citations(df_filter))
val = export_citations(df_filter)
st.code(val) |