Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
from filter_dataframe import filter_dataframe | |
def get_language_stats_df(): | |
return pd.read_parquet("data/datasets_stats.parquet") | |
def export_citations(df: pd.DataFrame) -> str: | |
return "\n\n".join(df["citation"].tolist()) | |
_MMS_CITATION = """\ | |
#TODO: Add MMS citation | |
""" | |
CITATION_SEPARATOR = "\n% " + ("-" * 90) + "\n" | |
def export_citations(df: pd.DataFrame): | |
dataset_names = df.original_dataset.tolist() | |
dataset_citations = df.citation.tolist() | |
df = pd.DataFrame({"dataset": dataset_names, "citation": dataset_citations}) | |
citations_grouped_df = df.groupby("citation").agg({"dataset": lambda x: ", ".join(x)}).reset_index().sort_values(by="dataset") | |
dataset_citations = ("% Datasets: " + citations_grouped_df["dataset"] + "\n" + citations_grouped_df["citation"]).to_list() | |
dataset_citations_joined = CITATION_SEPARATOR.join(dataset_citations) | |
return f"{_MMS_CITATION}\n\n{dataset_citations_joined}" | |
st.set_page_config(page_title="Dataset statistics", page_icon="📈") | |
st.markdown("# Dataset statistics") | |
st.write( | |
"""TODO: Description""" | |
) | |
df = get_language_stats_df() | |
df_filter = filter_dataframe(df) | |
st.dataframe(df_filter) | |
if st.button("Export citations"): | |
print("BLEEEEE") | |
print(export_citations(df_filter)) | |
val = export_citations(df_filter) | |
st.code(val) |