Spaces:
Runtime error
Runtime error
Szymon Woźniak
commited on
Commit
·
55c9088
1
Parent(s):
9d3e113
change citation formatting
Browse files
pages/2_Dataset_Statistics.py
CHANGED
@@ -7,9 +7,6 @@ from filter_dataframe import filter_dataframe
|
|
7 |
def get_language_stats_df():
|
8 |
return pd.read_parquet("data/datasets_stats.parquet")
|
9 |
|
10 |
-
def export_citations(df: pd.DataFrame) -> str:
|
11 |
-
return "\n\n".join(df["citation"].tolist())
|
12 |
-
|
13 |
_MMS_CITATION = """\
|
14 |
@misc{augustyniak2023massively,
|
15 |
title={Massively Multilingual Corpus of Sentiment Datasets and Multi-faceted Sentiment Classification Benchmark},
|
@@ -20,7 +17,7 @@ _MMS_CITATION = """\
|
|
20 |
primaryClass={cs.CL}
|
21 |
}"""
|
22 |
|
23 |
-
CITATION_SEPARATOR = "
|
24 |
|
25 |
def export_citations(df: pd.DataFrame):
|
26 |
dataset_names = df.original_dataset.tolist()
|
@@ -30,7 +27,7 @@ def export_citations(df: pd.DataFrame):
|
|
30 |
citations_grouped_df = df.groupby("citation").agg({"dataset": lambda x: ", ".join(x)}).reset_index().sort_values(by="dataset")
|
31 |
dataset_citations = ("% Datasets: " + citations_grouped_df["dataset"] + "\n" + citations_grouped_df["citation"]).to_list()
|
32 |
dataset_citations_joined = CITATION_SEPARATOR.join(dataset_citations)
|
33 |
-
return f"{_MMS_CITATION}\n
|
34 |
|
35 |
|
36 |
st.set_page_config(page_title="Dataset statistics", page_icon="📈")
|
|
|
7 |
def get_language_stats_df():
|
8 |
return pd.read_parquet("data/datasets_stats.parquet")
|
9 |
|
|
|
|
|
|
|
10 |
_MMS_CITATION = """\
|
11 |
@misc{augustyniak2023massively,
|
12 |
title={Massively Multilingual Corpus of Sentiment Datasets and Multi-faceted Sentiment Classification Benchmark},
|
|
|
17 |
primaryClass={cs.CL}
|
18 |
}"""
|
19 |
|
20 |
+
CITATION_SEPARATOR = "% " + ("-" * 90) + "\n\n"
|
21 |
|
22 |
def export_citations(df: pd.DataFrame):
|
23 |
dataset_names = df.original_dataset.tolist()
|
|
|
27 |
citations_grouped_df = df.groupby("citation").agg({"dataset": lambda x: ", ".join(x)}).reset_index().sort_values(by="dataset")
|
28 |
dataset_citations = ("% Datasets: " + citations_grouped_df["dataset"] + "\n" + citations_grouped_df["citation"]).to_list()
|
29 |
dataset_citations_joined = CITATION_SEPARATOR.join(dataset_citations)
|
30 |
+
return f"% MMS corpus citation\n{_MMS_CITATION}\n{CITATION_SEPARATOR}{dataset_citations_joined}"
|
31 |
|
32 |
|
33 |
st.set_page_config(page_title="Dataset statistics", page_icon="📈")
|