Spaces:
Running
Running
import datasets | |
from pathlib import Path | |
import argparse | |
DATASETS = [ | |
# source, destination | |
(('pauri32/fiqa-2018', None), 'fiqa-2018'), | |
(('FinGPT/fingpt-finred', None), 'fingpt-finred'), | |
(('zeroshot/twitter-financial-news-sentiment', None), 'twitter-financial-news-sentiment'), | |
(('oliverwang15/news_with_gpt_instructions', None), 'news_with_gpt_instructions'), | |
(('financial_phrasebank', 'sentences_50agree'), 'financial_phrasebank-sentences_50agree'), | |
(('FinGPT/fingpt-fiqa_qa', None), 'fingpt-fiqa_qa'), | |
(('FinGPT/fingpt-headline-cls', None), 'fingpt-headline-cls'), | |
(('FinGPT/fingpt-finred', None), 'fingpt-finred'), | |
(('FinGPT/fingpt-convfinqa', None), 'fingpt-convfinqa'), | |
(('FinGPT/fingpt-finred-cls', None), 'fingpt-finred-cls'), | |
(('FinGPT/fingpt-ner', None), 'fingpt-ner'), | |
(('FinGPT/fingpt-headline', None), 'fingpt-headline-instruct'), | |
(('FinGPT/fingpt-finred-re', None), 'fingpt-finred-re'), | |
(('FinGPT/fingpt-ner-cls', None), 'fingpt-ner-cls'), | |
(('FinGPT/fingpt-fineval', None), 'fingpt-fineval'), | |
(('FinGPT/fingpt-sentiment-cls', None), 'fingpt-sentiment-cls'), | |
] | |
def download(no_cache: bool = False): | |
"""Downloads all datasets to where the FinGPT library is located.""" | |
data_dir = Path(__file__).parent | |
for src, dest in DATASETS: | |
if Path(data_dir / dest).is_dir() and not no_cache: | |
print(f"Dataset found at {data_dir / dest}, skipping") | |
continue | |
dataset = datasets.load_dataset(*src) | |
dataset.save_to_disk(data_dir / dest) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--no_cache", default=False, required=False, type=str, help="Redownloads all datasets if set to True") | |
args = parser.parse_args() | |
download(no_cache=args.no_cache) | |