-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
93 lines (79 loc) · 2.51 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# -*- coding: utf-8 -*-
"""
Created on Fri May 29 23:23:52 2020
@author: ChefLiutao
"""
import pandas as pd
import numpy as np
from DataPreprocessing import normalization
from DataPreprocessing import build_s_a
from pytorch_DDPG import DDPG
import matplotlib.pyplot as plt
import os
##################### hyper parameters ####################
N_FEATURES = 6
A_LOW = 0
A_HIGH = 1
LR_A = 0.001
LR_C = 0.003
N_ACTOR_HIDDEN = 30
N_CRITIC_HIDDEN = 30
MAX_EPISODES = 300
MAX_STEPS = 1000
GAMMA = 0.9 # 折扣因子
TAU = 0.1 # 软更新因子
MEMORY_CAPACITY = 1000 # 记忆库大小
BATCH_SIZE = 128 # 批梯度下降的m
#############################################################
# Load data
data_dir = 'V6.csv'
data = pd.read_csv(data_dir, encoding='gbk')
data = data.iloc[:, 0]
# Build state matrix and best action
state, action = build_s_a(data, N_FEATURES, 1)
# Data split
SPLIT_RATE = 0.75
split_index = round(len(state) * SPLIT_RATE)
train_s, train_a = state[:split_index], action[:split_index]
test_s, test_a = state[split_index:], action[split_index:]
# Normalization
train_s_scaled, test_s_scaled = normalization(train_s, test_s)
A, B = train_a.max(), train_a.min()
train_a_scaled, test_a_scaled = (train_a - B) / (A - B), (test_a - B) / (A - B)
REPLACEMENT = [
dict(name='soft', tau= TAU),
dict(name='hard', rep_iter=200)
][0] # you can try different target replacement strategies
# Training
ddpg = DDPG(N_FEATURES, N_ACTOR_HIDDEN, N_CRITIC_HIDDEN, A_HIGH, REPLACEMENT, memory_capacity=MEMORY_CAPACITY,lr_a=LR_A, lr_c=LR_C)
for episode in range(MAX_EPISODES):
index = np.random.choice(range(len(train_s_scaled)))
s = train_s_scaled[index]
ep_reward = 0
for step in range(MAX_STEPS):
a = ddpg.choose_action(s)
r = -abs(a - train_a_scaled[index])
ep_reward += r
index += 1
s_ = train_s_scaled[index]
ddpg.store_transition(s, a, r, s_)
if len(ddpg.memory) == ddpg.memory_capacity:
ddpg.learn()
if (index == len(train_s_scaled) - 1) or (step == MAX_STEPS - 1):
print('Episode %d : %.2f' % (episode, ep_reward))
break
s = s_
# Testing
pred = []
for i in range(len(test_s_scaled)):
state = test_s_scaled[i]
action = ddpg.choose_action(state)
pred.append(action)
pred = [pred[i][0] for i in range(len(test_s_scaled))]
pred = pd.Series(pred)
pred = pred * (A - B) + B
actual = pd.Series(test_a)
plt.scatter(pred, test_a, marker='.')
plt.xlabel('Predicted Value')
plt.ylabel('Actual value')
plt.show()