from datasets import load_dataset from src.utils.eda_functions import process_text, generate_word_clouds_by_category dataset = load_dataset("quotaclimat/frugalaichallenge-text-train") train = dataset["train"].to_pandas() test = dataset["test"].to_pandas() # train["processed_quote"] = train["quote"].apply(process_text) train.to_csv("outputs/train_v1.csv", sep=";", index=False) test.to_csv("outputs/test.csv", sep=";", index=False) # Generate word clouds # generate_word_clouds_by_category(train)