Sigrid De los Santos
Remove remaining binary file for Hugging Face
9df4cc0
import datasets
from pathlib import Path
import argparse
DATASETS = [
# source, destination
(('pauri32/fiqa-2018', None), 'fiqa-2018'),
(('FinGPT/fingpt-finred', None), 'fingpt-finred'),
(('zeroshot/twitter-financial-news-sentiment', None), 'twitter-financial-news-sentiment'),
(('oliverwang15/news_with_gpt_instructions', None), 'news_with_gpt_instructions'),
(('financial_phrasebank', 'sentences_50agree'), 'financial_phrasebank-sentences_50agree'),
(('FinGPT/fingpt-fiqa_qa', None), 'fingpt-fiqa_qa'),
(('FinGPT/fingpt-headline-cls', None), 'fingpt-headline-cls'),
(('FinGPT/fingpt-finred', None), 'fingpt-finred'),
(('FinGPT/fingpt-convfinqa', None), 'fingpt-convfinqa'),
(('FinGPT/fingpt-finred-cls', None), 'fingpt-finred-cls'),
(('FinGPT/fingpt-ner', None), 'fingpt-ner'),
(('FinGPT/fingpt-headline', None), 'fingpt-headline-instruct'),
(('FinGPT/fingpt-finred-re', None), 'fingpt-finred-re'),
(('FinGPT/fingpt-ner-cls', None), 'fingpt-ner-cls'),
(('FinGPT/fingpt-fineval', None), 'fingpt-fineval'),
(('FinGPT/fingpt-sentiment-cls', None), 'fingpt-sentiment-cls'),
]
def download(no_cache: bool = False):
"""Downloads all datasets to where the FinGPT library is located."""
data_dir = Path(__file__).parent
for src, dest in DATASETS:
if Path(data_dir / dest).is_dir() and not no_cache:
print(f"Dataset found at {data_dir / dest}, skipping")
continue
dataset = datasets.load_dataset(*src)
dataset.save_to_disk(data_dir / dest)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--no_cache", default=False, required=False, type=str, help="Redownloads all datasets if set to True")
args = parser.parse_args()
download(no_cache=args.no_cache)