sumesh4C's picture
Upload 2 files
60d8d9d verified
raw
history blame
499 Bytes
from datasets import load_dataset
from src.utils.eda_functions import process_text, generate_word_clouds_by_category
dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")
train = dataset["train"].to_pandas()
test = dataset["test"].to_pandas()
# train["processed_quote"] = train["quote"].apply(process_text)
train.to_csv("outputs/train_v1.csv", sep=";", index=False)
test.to_csv("outputs/test.csv", sep=";", index=False)
# Generate word clouds
# generate_word_clouds_by_category(train)