Spaces:

ATB
/

AI-trade-bot-demo

Runtime error

App Files Files Community

pmthangk09 commited on Feb 12, 2023

Commit

a6f70d0

2 Parent(s): ee56cf8 f7ed643

Merge branch 'main' of https://huggingface.co/spaces/ATB/AI-trade-bot-demo

Browse files

Files changed (4) hide show

rl_agent/env.py +28 -29
rl_agent/policy.py +6 -6
rl_agent/test_env.py +127 -0
rl_agent/utils.py +35 -0

rl_agent/env.py CHANGED Viewed

@@ -1,53 +1,47 @@
 import numpy as np
 import pandas as pd
 class Environment:
-    def __init__(self, data, history_t=90):
         self.data = data
         self.history_t = history_t
         self.reset()
     def reset(self):
         self.t = 0
         self.done = False
         self.profits = 0
-        self.positions = []
-        self.position_value = 0
-        self.history = [0 for _ in range(self.history_t)]
         return [self.position_value] + self.history # obs
     def step(self, act):
         reward = 0
-        # act = 0: stay, 1: buy, -1: sell
-        if act == 1:
-            self.positions.append(self.data.iloc[self.t, :]['Close'])
-        elif act == 2: # sell
-            if len(self.positions) == 0:
-                reward = -1
-            else:
-                profits = 0
-                for p in self.positions:
-                    profits += (self.data.iloc[self.t, :]['Close'] - p)
-                reward += profits
-                self.profits += profits
-                self.positions = []
         # set next time
         self.t += 1
-        self.position_value = 0
-        for p in self.positions:
-            self.position_value += (self.data.iloc[self.t, :]['Close'] - p)
         self.history.pop(0)
-        self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close'])
-        # clipping reward
-        if reward > 0:
-            reward = 1
-        elif reward < 0:
-            reward = -1
         return [self.position_value] + self.history, reward, self.done # obs, reward, done
@@ -64,9 +58,14 @@ if __name__ == "__main__":
     test = data[date_split:]
     print(train.head(10))
-    env = Environment(train)
     print(env.reset())
-    for _ in range(3):
         pact = np.random.randint(3)
-        print(env.step(pact))

 import numpy as np
 import pandas as pd
+import torch
 class Environment:
+    def __init__(self, data, history_t=8, history=[0.1, 0.2, -0.1, -0.2, 0., 0.5, 0.9], state_size=9):
         self.data = data
+        self.history = history
         self.history_t = history_t
+        self.state_size = state_size
+        self.cost_rate = 0.0001
         self.reset()
     def reset(self):
         self.t = 0
         self.done = False
         self.profits = 0
+        self.position_value = 0.
+        self.history = self.history[:self.state_size - 1]
         return [self.position_value] + self.history # obs
     def step(self, act):
         reward = 0
+        # act = 0: stay, act > 0: buy, act < 0: sell
+        #Additive profits
+        cost_amount = np.abs(act-self.position_value)
+        Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
+        reward = (self.position_value * Zt) - (self.cost_rate * cost_amount)
+        self.profit = self.position_value * Zt
+        self.profits += self.profit
         # set next time
         self.t += 1
+        self.position_value = act
         self.history.pop(0)
+        self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']) # the price being traded
+        self.position_value = self.position_value.item()
         return [self.position_value] + self.history, reward, self.done # obs, reward, done
     test = data[date_split:]
     print(train.head(10))
+    history = []
+    for i in range(1, 9):
+        c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
+        history.append(c)
+    env = Environment(train, history=history)
     print(env.reset())
+    for _ in range(9, 12):
         pact = np.random.randint(3)
+        print(env.step(pact)[1])

rl_agent/policy.py CHANGED Viewed

@@ -8,19 +8,19 @@ class Policy(nn.Module):
         super(Policy, self).__init__()
-        self.layer1 = nn.Linear(input_channels, 2 * input_channels)
         self.tanh1   = nn.Tanh()
-        self.layer2 = nn.linear(2 * input_channels, 1)
-        self.tanh2 = nn.Tanh()
     def forward(self, state):
         hidden = self.layer1(state)
         hidden = self.tanh1(hidden)
-        hidden = self.layer2(hidden)
-        action = self.tanh2(hidden)
-        return action

         super(Policy, self).__init__()
+        self.layer1 = nn.Linear(input_channels, 1)
         self.tanh1   = nn.Tanh()
+        # self.layer2 = nn.Linear(2 * input_channels, 1)
+        # self.tanh2 = nn.Tanh()
     def forward(self, state):
         hidden = self.layer1(state)
         hidden = self.tanh1(hidden)
+        # hidden = self.layer2(hidden)
+        # action = self.tanh2(hidden)
+        return hidden

rl_agent/test_env.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from env import Environment
+from policy import Policy
+from utils import myOptimizer
+import pandas as pd
+import numpy as np
+import torch
+from collections import OrderedDict
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+from torch.utils.tensorboard import SummaryWriter
+if __name__ == "__main__":
+    writer = SummaryWriter('runs/new_data_ex_7')
+    # data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
+    data = pd.read_csv('./data/EURUSD_Candlestick_30_M_BID_01.01.2021-04.02.2023.csv')
+    # data['Local time'] = pd.to_datetime(data['Local time'])
+    data = data.set_index('Local time')
+    print(data.index.min(), data.index.max())
+    # date_split = '19.09.2022 17:55:00.000 GMT-0500'
+    # date_split = '25.08.2022 04:30:00.000 GMT-0500' # 30 min
+    date_split = '03.02.2023 15:30:00.000 GMT-0600' # 30 min
+    train = data[:date_split]
+    test = data[date_split:]
+    learning_rate = 0.001
+    first_momentum = 0.0
+    second_momentum = 0.0001
+    transaction_cost = 0.0001
+    adaptation_rate = 0.01
+    state_size = 15
+    equity = 1.0
+    agent = Policy(input_channels=state_size)
+    optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
+    history = []
+    for i in range(1, state_size):
+        c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
+        history.append(c)
+    env = Environment(train, history=history, state_size=state_size)
+    observation = env.reset()
+    model_gradients_history = dict()
+    checkpoint = OrderedDict()
+    for name, param in agent.named_parameters():
+        model_gradients_history.update({name: torch.zeros_like(param)})
+    for i in tqdm(range(state_size, len(train))):
+        observation = torch.as_tensor(observation).float()
+        action = agent(observation)
+        observation, reward, _ = env.step(action.data.to("cpu").numpy())
+        action.backward()
+        for name, param in agent.named_parameters():
+            grad_n = param.grad
+            param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
+            checkpoint[name] = param
+            model_gradients_history.update({name: grad_n})
+        if i > 10000:
+            equity += env.profit
+            writer.add_scalar('equity', equity, i)
+        else:
+            writer.add_scalar('equity', 1.0, i)
+        optimizer.after_step(reward)
+        agent.load_state_dict(checkpoint)
+    ###########
+    ###########
+    # history = []
+    # for i in range(1, state_size):
+    #     c = test.iloc[i, :]['Close'] - test.iloc[i-1, :]['Close']
+    #     history.append(c)
+    # env = Environment(test, history=history, state_size=state_size)
+    # observation = env.reset()
+    # model_gradients_history = dict()
+    # checkpoint = OrderedDict()
+    # for name, param in agent.named_parameters():
+    #     model_gradients_history.update({name: torch.zeros_like(param)})
+    # for _ in tqdm(range(state_size, len(test))):
+    #     observation = torch.as_tensor(observation).float()
+    #     action = agent(observation)
+    #     observation, reward, _ = env.step(action.data.numpy())
+    #     action.backward()
+    #     for name, param in agent.named_parameters():
+    #         grad_n = param.grad
+    #         param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
+    #         checkpoint[name] = param
+    #         model_gradients_history.update({name: grad_n})
+    #     optimizer.after_step(reward)
+    #     agent.load_state_dict(checkpoint)
+    print(env.profits)

rl_agent/utils.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import numpy as np
+import torch
+class myOptimizer():
+    def __init__(self, lr, mu, mu_square, adaptation_rate, transaction_cost):
+        self.lr = lr
+        self.mu = mu
+        self.mu_square = mu_square
+        self.adaptation_rate = adaptation_rate
+        self.transaction_cost = transaction_cost
+    def step(self, grad_n, reward, last_observation, last_gradient):
+        numerator = self.mu_square - (self.mu * reward)
+        denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
+        gradient = numerator / denominator
+        current_grad = (-1.0 * self.transaction_cost * grad_n)
+        previous_grad = (last_observation + self.transaction_cost) * last_gradient
+        gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
+        return torch.as_tensor(self.lr * gradient)
+    def after_step(self, reward):
+        self.mu = self.mu + self.adaptation_rate * (reward - self.mu)
+        self.mu_square = self.mu_square + self.adaptation_rate * ((reward ** 2) - self.mu_square)