-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathblitter.pas
373 lines (308 loc) · 9.55 KB
/
blitter.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
unit blitter;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, Platform, HeapManager;
procedure dma_blit(chn,from,x,y,too,x2,y2,len,lines,bpl1,bpl2:integer);
procedure blit8(from,x,y,too,x2,y2,length,lines,bpl1,bpl2:integer);
//procedure aligned_blit8(from,x,y,too,x2,y2,length,lines,bpl1,bpl2:integer);
procedure fill(start,len,color:integer);
procedure fill32(start,len,color:integer);
procedure fastmove(from,too,len:integer);
procedure fill2d(dest,x,y,length,lines,bpl,color:integer);
procedure fill2d32(dest,x,y,length,lines,bpl:integer;color:cardinal);
implementation
uses retromalina;
type TCtrlBlock=array[0..15,0..7] of cardinal;
PCtrlBlock=^TCtrlBlock;
const
blit_dma_chn=9; // let blitter use dma #6
_dma_enable= $3F007ff0; // DMA enable register
_dma_cs= $3F007000; // DMA control and status
_dma_conblk= $3F007004; // DMA ctrl block address
_blitter_dmacb=base+$60100; // blitter dma control block
_blitter_color=base+$60120; // blitter color area
nocache=$C0000000; // disable GPU cache
var
dma_enable:cardinal absolute _dma_enable; // DMA Enable register
dma_cs:cardinal absolute _dma_cs+($100*blit_dma_chn); // DMA ctrl/status
dma_conblk:cardinal absolute _dma_conblk+($100*blit_dma_chn); // DMA ctrl block addr
ctrl1: TCtrlBlock absolute _blitter_dmacb;
color8: array[0..15] of byte absolute _blitter_color;
color16: array[0..7] of word absolute _blitter_color;
color32: array[0..3] of cardinal absolute _blitter_color;
procedure blit8(from,x,y,too,x2,y2,length,lines,bpl1,bpl2:integer);
// --- rev 21070509
label p101,p999;
begin
if (length<=0) or (lines<=0) then goto p999;
asm
push {r0-r7}
ldr r0,from
ldr r1,x
add r0,r1
ldr r2,y
ldr r3,bpl1 //r3=bpl1
mul r4,r3,r2
add r0,r4 //r0=src start
ldr r1,too
ldr r2,x2
add r1,r2
ldr r4,y2
ldr r5,bpl2 //r5=bpl2
ldr r2,lines //r2=lines
mul r6,r5,r4
add r1,r6 //r1=dest start
ldr r4,length //r4=length
add r7,r1,r4
p101: ldrb r6,[r0],#1
strb r6,[r1],#1
cmps r1,r7
blt p101
add r0,r3
sub r0,r4
add r1,r5
mov r7,r1
sub r1,r4
subs r2,#1
bgt p101
pop {r0-r7}
end;
p999:
end;
procedure dma_blit(chn,from,x,y,too,x2,y2,len,lines,bpl1,bpl2:integer);
label p999;
var transfer_info2:cardinal;
cs:Pcardinal; // absolute _dma_cs+($100*blit_dma_chn); // DMA ctrl/status
conblk:Pcardinal; // absolute _dma_conblk+($100*blit_dma_chn); // DMA ctrl block addr
begin
//cleandatacacherange(from+x+y*bpl1,lines*bpl1); // source range cache clean
if len<1 then goto p999;
if x+len>bpl1 then len:=bpl1-x;
if x2<0 then
begin
x:=x-x2;
len:=len+x2;
x2:=0;
if len<1 then goto p999;
end;
if y2<0 then
begin
y:=y-y2;
lines:=lines+y2;
if lines<1 then goto p999;
y2:=0;
end;
if (x2+len)>(xres-1) then len:=xres-x2;
if (y2+lines)>(yres-1) then lines:=yres-y2;
if len<1 then goto p999;
if lines<1 then goto p999;
transfer_info2:=$00009332; //burst=9, 2D
cs:=Pcardinal(_dma_cs+$100*chn);
conblk:=Pcardinal(_dma_conblk+$100*chn);
ctrl1[chn,0]:=transfer_info2; // transfer info
ctrl1[chn,1]:=from+x+bpl1*y+$80000000; // source address -> buffer #1
ctrl1[chn,2]:=too+x2+bpl2*y2; // destination address
ctrl1[chn,3]:=len+((lines-1) shl 16); // transfer length - why lines-1 ??
ctrl1[chn,4]:=((bpl2-len) shl 16)+((bpl1-len)); // 2D
ctrl1[chn,5]:=$0; // next ctrl block -> 0
ctrl1[chn,6]:=$0; // unused
ctrl1[chn,7]:=$0; // unused
CleanDataCacheRange(_blitter_dmacb+$20*chn,32); // now push this into RAM
cleandatacacherange(from+x+y*bpl1,(lines+1)*bpl1); // source range cache clean
cleanDataCacheRange(too+x2+y2*bpl2,(lines+1)*bpl2); // destination range cache clean
// Init the hardware
//cs^:=$80EE0003;
dma_enable:=dma_enable or (1 shl chn); // enable dma channel # dma_chn
conblk^:=nocache+_blitter_dmacb+$20*chn; // init DMA ctr block
cs^:=$00110003; // start DMA
repeat until (cs^ and 1) =0 ; //
InvalidateDataCacheRange(too+x2+y2*bpl2,(lines+1)*bpl2); // !!!
p999:
end;
procedure fill(start,len,color:integer);
label p101 ;
begin
asm
push {r0-r12}
ldr r12,len
ldr r10,start
add r12,r10
ldrb r0,color
add r0,r0,r0,lsl #8
add r0,r0,r0,lsl #16
mov r1,r0
mov r2,r0
mov r3,r0
mov r4,r0
mov r5,r0
mov r6,r0
mov r7,r0
p101: stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
cmps r10,r12
blt p101
pop {r0-r12}
end;
end;
procedure fill2d(dest,x,y,length,lines,bpl,color:integer);
// --- rev 21070509
label p101,p999;
begin
if length<1 then goto p999;
if x<0 then
begin
length:=length+x;
x:=0;
if length<1 then goto p999;
end;
if y<0 then
begin
lines:=lines+y;
if lines<1 then goto p999;
y:=0;
end;
if (x+length)>(xres-1) then length:=xres-x;
if (y+lines)>(yres-1) then lines:=yres-y;
if length<1 then goto p999;
if lines<1 then goto p999;
asm
push {r0-r7}
ldr r1,dest
ldr r2,x
add r1,r2
ldr r4,y
ldr r5,bpl //r5=bpl2
ldr r2,lines //r2=lines
mul r6,r5,r4
add r1,r6 //r1=dest start
ldr r4,length //r4=length
ldrb r6,color
add r7,r1,r4
p101: strb r6,[r1],#1
cmps r1,r7
blt p101
add r0,r3
sub r0,r4
add r1,r5
mov r7,r1
sub r1,r4
subs r2,#1
bgt p101
pop {r0-r7}
end;
p999:
end;
procedure fill2d32(dest,x,y,length,lines,bpl:integer;color:cardinal);
// --- rev 21071004
label p101,p999;
begin
if length<1 then goto p999;
if x<0 then
begin
length:=length+x;
x:=0;
if length<1 then goto p999;
end;
if y<0 then
begin
lines:=lines+y;
if lines<1 then goto p999;
y:=0;
end;
if length<1 then goto p999;
if lines<1 then goto p999;
asm
push {r0-r7}
ldr r1,dest // r1:=dest;
ldr r2,x // r2:=x;
add r1,r1,r2,lsl #2 // r1:=r1+r2*4 - pointer to the start
ldr r4,y // r4:=y;
ldr r5,bpl // r5=bpl;
ldr r2,lines // r2:=lines;
mul r6,r5,r4 // r6:=lines*bpl; bpl - bytes per line
add r1,r6 // r1=:r1+y*bpl
ldr r4,length // r4:=length in pixels
ldr r6,color // r6:=color
add r7,r1,r4,lsl #2 // r7:=r1+r4*4
p101: str r6,[r1],#4
cmps r1,r7
blt p101 // fill the line
add r1,r5 // end of the next line
add r7,r5
sub r1,r1,r4,lsl #2
subs r2,#1
bgt p101
pop {r0-r7}
end;
p999:
end;
procedure fill32(start,len,color:integer);
label p101 ;
begin
asm
push {r0-r12}
ldr r12,len
ldr r10,start
add r12,r10
ldr r0,color
// add r0,r0,r0,lsl #8
// add r0,r0,r0,lsl #16
mov r1,r0
mov r2,r0
mov r3,r0
mov r4,r0
mov r5,r0
mov r6,r0
mov r7,r0
p101: stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
stm r10!,{r0-r7}
cmps r10,r12
blt p101
pop {r0-r12}
end;
end;
procedure fastmove(from,too,len:integer);
label p101 ;
begin
asm
push {r0-r12}
ldr r12,len
ldr r9,from
add r12,r9
ldr r10,too
p101:
ldm r9!, {r0-r7}
stm r10!,{r0-r7}
ldm r9!, {r0-r7}
stm r10!,{r0-r7}
ldm r9!, {r0-r7}
stm r10!,{r0-r7}
ldm r9!, {r0-r7}
stm r10!,{r0-r7}
ldm r9!, {r0-r7}
stm r10!,{r0-r7}
ldm r9!, {r0-r7}
stm r10!,{r0-r7}
ldm r9!, {r0-r7}
stm r10!,{r0-r7}
ldm r9!, {r0-r7}
stm r10!,{r0-r7}
cmps r9,r12
blt p101
pop {r0-r12}
end;
end;
end.