-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtutorial.py
168 lines (135 loc) · 4.21 KB
/
tutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#---------------------------#
# DEFAULT BOT CONFIGURATION #
#---------------------------#
import os
symbol = "BTCUSDT"
interval = "1h"
datalimit = 10000
logs = "logs"
models = "models"
tablePath = f'{symbol}_{interval}.csv'
modelPath = f'{symbol}_{interval}.zip'
if not os.path.exists(logs): os.makedirs(logs)
if not os.path.exists(models): os.makedirs(models)
#-------------------------------#
# FETCH DATA FROM API (Binance) #
#-------------------------------#
import requests
import pandas as pd
url = f'https://api.binance.com/api/v3/klines?symbol={symbol}&interval={interval}&limit={datalimit}'
data = requests.get(url).json()
df = pd.DataFrame(data, columns=[
# Used columns
'timestamp', 'open', 'high', 'low', 'close', 'volume',
# Ignored columns
'number_of_trades',
'close_time',
'quote_asset_volume',
'taker_buy_base_asset_volume',
'taker_buy_quote_asset_volume',
'ignore'
])
data = df[['open', 'high', 'low', 'close', 'volume']].astype(float) # Original data (current)
norm_data = (data - data.min()) / (data.max() - data.min()) # Normalized data (optional)
### DEBUG ONLY (save data to csv) (optional)
data.to_csv(f"{models}/original_{tablePath}", index=False)
norm_data.to_csv(f"{models}/{tablePath}", index=False)
#----------------------------#
# CREATE TRADING ENVIRONMENT #
#----------------------------#
import gym
import numpy as np
class BuySell(gym.Env):
def __init__(self, data):
self.data = data
# Define boundaries
actions = 3
minValue = -np.inf
maxValue = np.inf
columns = len(self.data.columns)
# Define action and observation spaces
self.action_space = gym.spaces.Discrete(actions) # HOLD, BUY, SELL
self.observation_space = gym.spaces.Box(low=minValue, high=maxValue, shape=(columns,))
self.reset()
def reset(self):
# Default values
self.t = 0
self.bought = False
self.cost = 0
return self.data.iloc[self.t].values
def step(self, action):
# Required Values
current = self.data.iloc[self.t]
reward = 0
# Display values
order = "HOLD"
dd = 0
# Apply market strategy
if action == 0: # HOLD
pass
elif action == 1: # BUY
if not self.bought:
order = "BUY"
self.cost = current['close']
self.bought = True
elif action == 2: # SELL
if self.bought and current['close'] - self.cost > 0:
order = "SELL"
reward = current['close'] - self.cost
self.bought = False
# Calculate Drawdown
if current['close'] < self.cost and self.bought:
dd = self.cost - current['close']
# Update step
done = (self.t == len(self.data) - 1)
obs = self.data.iloc[self.t].values
self.t += 1
# Return observation
return obs, reward, done, {
"action": order,
"price": float(current['close']),
"cost": float(self.cost),
"profit": float(reward),
"drawdown": float(dd),
}
#-----------------------#
# CREATE TRAINING MODEL #
#-----------------------#
# Test out different models from stable_baselines3 (current: DQN)
from stable_baselines3 import PPO, DQN, A2C
# Create Environment
strategy = BuySell(data)
iterations = 1000000
model = "DQN"
path = f"{models}/{model}_{modelPath}"
# Create or load model
if not os.path.exists(path):
# Create
model = DQN('MlpPolicy', strategy, verbose=1, tensorboard_log=logs)
# Train
model.learn(total_timesteps=iterations, reset_num_timesteps=False)
# Save
model.save(path)
else:
# Load
model = DQN.load(path)
#-------------------#
# MARKET PREDICTION #
#-------------------#
obs = strategy.reset()
profit = 0
drawdown = []
done = False
# Make Prediction
while True:
action, _states = model.predict(obs)
obs, reward, done, info = strategy.step(action)
profit += info['profit']
drawdown.append(info['drawdown'])
print(info)
if done: break
print()
print("Ticker:", symbol)
print("Total Profit:", profit)
print("Max Drawdown:", max(drawdown))
print()