File size: 2,442 Bytes
9df4cc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os 
import json
from tqdm import tqdm
import argparse

from indices import *
from data import  prepare_data_for_symbol, query_gpt4, create_dataset
from prompt import get_all_prompts
from data_infererence_fetch import get_curday, fetch_all_data, get_all_prompts_online


def main(args):

    index_name = args['index_name']
    start_date = args['start_date']
    end_date = args['end_date']
    min_past_weeks = args['min_past_weeks']
    max_past_weeks = args['max_past_weeks']
    train_ratio = args['train_ratio']

    with_basics = True
    if index_name == "dow":
        index_name = "DOW-30"
        index = DOW_30
    elif index_name == "euro":
        index_name = "EURO-STOXX-50"
        index = EURO_STOXX_50
    elif index_name == "crypto":
        index_name = "CRYPTO"
        index = CRYPTO
        with_basics = False
    else:
        raise ValueError("Invalid index name")
    
    data_dir = f"./data/{index_name}_{start_date}_{end_date}"
    os.makedirs(data_dir, exist_ok=True)
    
    # Acquire data
    print("Acquiring data")
    for symbol in tqdm(index):
        print(f"Processing {symbol}")
        prepare_data_for_symbol(symbol, data_dir, start_date, end_date, with_basics=with_basics)

    # Generate prompt and query GPT-4
    print("Generating prompts and querying GPT-4")
    query_gpt4(index, data_dir, start_date, end_date, min_past_weeks, max_past_weeks, with_basics=with_basics)

    # Transform into training format
    print("Transforming into training format")
    dataset = create_dataset(index, data_dir, start_date, end_date, train_ratio, with_basics=with_basics)

    # Save dataset
    dataset.save_to_disk(
        f"./data/fingpt-forecaster-{index_name.lower()}-{start_date.replace('-', '')}-{end_date.replace('-', '')}-{min_past_weeks}-{max_past_weeks}-{str(train_ratio).replace('.', '')}"
    )


if __name__ == "__main__":

    ap = argparse.ArgumentParser()
    ap.add_argument("--index_name", default="crypto", choices=["dow", "euro", "crypto"], help="index name")
    ap.add_argument("--start_date", default="2022-12-31", help="start date")
    ap.add_argument("--end_date", default="2023-12-31", help="end date")
    ap.add_argument("--min_past_weeks", default=1, help="min past weeks")
    ap.add_argument("--max_past_weeks", default=4, help="max past weeks")
    ap.add_argument("--train_ratio", default=0.6, help="train ratio")
    args = vars(ap.parse_args())

    main(args)