-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpltActionValue.m
204 lines (171 loc) · 6.83 KB
/
pltActionValue.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
function [h1,h2] = pltActionValue(M,HQ,pxlCellWidth,doSurf,sFile)
% Sensitivity Analysis
% -------------------------------------------------------------------------
%
% Function :
% pltSensitivityAnalysis(M,H,pxlCellWidth)
%
% Inputs :
% M - Multi layer maze structure cell array
% nTLP - Number of teleportation pairs
% gamma - Discount parameter
% alpha - Learningrate parameter
% epsilon - Probability of random action in e-greedy policy
% lambda - Decay-rate parameter for eligibility traces
% doReport - Prints a report to the console per episode
%
% -------------------------------------------------------------------------
% Author : P.C. Luteijn
% email : [email protected]
% Date : July 2017
% Comment : Function excutes a reinforcement learning algortihm using
% teporal diference learning with a focus on Q-Learning. Also
% an extra surface plot is generated portraing the normalized
% Q-Matrix values.
% -------------------------------------------------------------------------
% Get maze size
[nr,nc,~] = size(M);
% Numer of maze cells
N = nr*nc;
% Number of teleport locations
nTP = max(max(M(:,:,7)));
% Start/End position
[p0(1),p0(2)] = find(M(:,:,6)==1);
[p1(1),p1(2)] = find(M(:,:,6)==2);
% Colormap
res = 1000; % Resolution
cmap = hsv(res); % Normalized values colormap
cmap = jet(res);
% Create figure
[h1,objShape,idxR,tbTitle] = pltDrawMaze(M,pxlCellWidth,1);
% Shape color (unused)
valShapeColor = [ 1.0, 1.0, 1.0; ... % White
0.8, 0.8, 0.8; ... % Drak-Grey
1.0, 0.5, 0.5; ... % Red
0.1, 1.0, 0.1; ... % Green
1.0, 1.0, 0.8; ...
1.0, 0.0, 1.0 ]; % Magenta
% Modify parameters
h.Name = 'Action-Values';
tbTitle.FontWeight = 'Normal';
tbTitle.FontSize = 10;
tbTitle.String = '';
% Normalize
% ---------------------------------------------------------------------
% Normalized Q-Matix record wrt a given resolution
H = HQ(:,:,end);
maxH = max(max(H));
minH = min(min(H));
normH = res - round(res*(H-maxH)./(minH-maxH),0);
normH(normH==0) = 1; % set end-point to zero
% Visualize Q-Matrix
% ---------------------------------------------------------------------
% Update all cell wrt to the corresponding Q-Matrix
for i = 1:nr
for j = 1:nc
% Grid locations
idxC = idxR(i,j);
% Color code
clrCell = cmap(normH(i,j),:);
% Update figure
objShape(idxC).EdgeColor = clrCell;
objShape(idxC).FaceColor = clrCell;
end
end
% TextBox & Cell color finishing
% ---------------------------------------------------------------------
% Start/End location
hold on, text( p0(2) - 0.5, nr + 0.5 - p0(1) , 'S', ...
'HorizontalAlignment', 'center', ...
'VerticalAlignment', 'middle', ...
'FontSize', 8, ...
'FontWeight', 'Bold' ); hold off
hold on, text( p1(2) - 0.5, nr + 0.5 - p1(1), 'F', ...
'HorizontalAlignment', 'center', ...
'VerticalAlignment', 'middle', ...
'FontSize', 8, ...
'FontWeight', 'Bold' ); hold off
% Cell color : Start/Finish
objShape(idxR(p0(1),p0(2))).FaceColor = valShapeColor(2,:);
objShape(idxR(p1(1),p1(2))).FaceColor = valShapeColor(2,:);
% Teleport Locations
for i = 1:nTP
% Teleport location
[tR,tC] = find(M(:,:,7)==i);
% String
strTP = sprintf('%i',i);
% Add to figure
hold on, text( tC(1) - 0.5, nr + 0.5 - tR(1) , strTP, ...
'HorizontalAlignment', 'center', ...
'VerticalAlignment', 'middle', ...
'FontSize', 8, ...
'FontWeight', 'Bold' ); hold off
hold on, text( tC(2) - 0.5, nr + 0.5 - tR(2), strTP, ...
'HorizontalAlignment', 'center', ...
'VerticalAlignment', 'middle', ...
'FontSize', 8, ...
'FontWeight', 'Bold' ); hold off
% Cell color : Teleport Location
objShape(idxR(tR(1),tC(1))).FaceColor = valShapeColor(6,:);
objShape(idxR(tR(2),tC(2))).FaceColor = valShapeColor(6,:);
end
% Add the colorbar
range = [0:1:5]./5;
for i = 1:length(range)
if range(7-i) == 0
strRange{i} = sprintf(' % 2.1f',range(7-i));
else
strRange{i} = sprintf(' % 2.1f',-range(7-i));
end
end
colormap(cmap)
colorbar('Ticks',range,'TickLabels',strRange)
% Save file
if exist('sFile','var')
strSave = [ '../Report/figures/' sFile '.png' ];
saveas(gcf,strSave)
end
%% Extra: Surface plot
% ---------------------------------------------------------------------
% Check if surf-plot is requested
if exist('doSurf') && doSurf == 1
% Get normalized values
surfF = zeros(nr,nc);
% Flip the surface
for i = 1:nr
for j = 1:nc
surfF(i,j) = normH(nr+1-i,j)./res;
end
end
% Extension by 1 row/col
surfFext = zeros(nr+2,nc+2); % Extended empty
surfFext(2:nr+1,2:nc+1) = surfF(:,:); % Copy maze
surfFext(1,2:nc+1) = surfF(1,:); % Add top row
surfFext(nr+2,2:nc+1) = surfF(nr,:); % Add bottom row
surfFext(2:nr+1,1) = surfF(:,1); % Add left col
surfFext(2:nr+1,nc+2) = surfF(:,nc); % Add right col
% Correct corner points
surfFext(1,1) = (surfFext(2,1)+surfFext(1,2))/2;
surfFext(1,end) = (surfFext(2,end)+surfFext(1,end-1))/2;
surfFext(end,1) = (surfFext(end,2)+surfFext(end-1,1))/2;
surfFext(end,end) = (surfFext(end-1,end)+surfFext(end,end-1))/2;
% Surface plot
h2 = figure('Name','Action-Value surface plot', ...
'Units','Normalized',...
'Position',h1.Position);
surf(1:nc+2,1:nr+2,surfFext-1), grid on
% Modify plot
colormap(cmap);
% Modify axes
ax = gca;
ax.XLim = [2,nc+2];
ax.YLim = [2,nr+2];
ax.XTickLabel = '';
ax.YTickLabel = '';
% Save file
if exist('sFile','var')
strSave = [ '../Report/figures/' sFile '_surf.png' ];
saveas(gcf,strSave)
end
end
end