Spaces:
Running
Running
import os | |
import json | |
from tqdm import tqdm | |
import argparse | |
from indices import * | |
from data import prepare_data_for_symbol, query_gpt4, create_dataset | |
from prompt import get_all_prompts | |
from data_infererence_fetch import get_curday, fetch_all_data, get_all_prompts_online | |
def main(args): | |
index_name = args['index_name'] | |
start_date = args['start_date'] | |
end_date = args['end_date'] | |
min_past_weeks = args['min_past_weeks'] | |
max_past_weeks = args['max_past_weeks'] | |
train_ratio = args['train_ratio'] | |
with_basics = True | |
if index_name == "dow": | |
index_name = "DOW-30" | |
index = DOW_30 | |
elif index_name == "euro": | |
index_name = "EURO-STOXX-50" | |
index = EURO_STOXX_50 | |
elif index_name == "crypto": | |
index_name = "CRYPTO" | |
index = CRYPTO | |
with_basics = False | |
else: | |
raise ValueError("Invalid index name") | |
data_dir = f"./data/{index_name}_{start_date}_{end_date}" | |
os.makedirs(data_dir, exist_ok=True) | |
# Acquire data | |
print("Acquiring data") | |
for symbol in tqdm(index): | |
print(f"Processing {symbol}") | |
prepare_data_for_symbol(symbol, data_dir, start_date, end_date, with_basics=with_basics) | |
# Generate prompt and query GPT-4 | |
print("Generating prompts and querying GPT-4") | |
query_gpt4(index, data_dir, start_date, end_date, min_past_weeks, max_past_weeks, with_basics=with_basics) | |
# Transform into training format | |
print("Transforming into training format") | |
dataset = create_dataset(index, data_dir, start_date, end_date, train_ratio, with_basics=with_basics) | |
# Save dataset | |
dataset.save_to_disk( | |
f"./data/fingpt-forecaster-{index_name.lower()}-{start_date.replace('-', '')}-{end_date.replace('-', '')}-{min_past_weeks}-{max_past_weeks}-{str(train_ratio).replace('.', '')}" | |
) | |
if __name__ == "__main__": | |
ap = argparse.ArgumentParser() | |
ap.add_argument("--index_name", default="crypto", choices=["dow", "euro", "crypto"], help="index name") | |
ap.add_argument("--start_date", default="2022-12-31", help="start date") | |
ap.add_argument("--end_date", default="2023-12-31", help="end date") | |
ap.add_argument("--min_past_weeks", default=1, help="min past weeks") | |
ap.add_argument("--max_past_weeks", default=4, help="max past weeks") | |
ap.add_argument("--train_ratio", default=0.6, help="train ratio") | |
args = vars(ap.parse_args()) | |
main(args) |