forked from rh-hideout/pokeemerald-expansion
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathm4a_1.s
2664 lines (2492 loc) · 62.5 KB
/
m4a_1.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
.include "asm/macros.inc"
.include "constants/gba_constants.inc"
.include "constants/m4a_constants.inc"
.syntax unified
.text
thumb_func_start umul3232H32
umul3232H32:
adr r2, __umul3232H32
bx r2
.arm
__umul3232H32:
umull r2, r3, r0, r1
add r0, r3, 0
bx lr
thumb_func_end umul3232H32
thumb_func_start SoundMain
SoundMain:
ldr r0, lt_SOUND_INFO_PTR
ldr r0, [r0]
ldr r2, lt_ID_NUMBER
ldr r3, [r0, o_SoundInfo_ident]
cmp r2, r3
beq SoundMain_1
bx lr @ Exit the function if ident doesn't match ID_NUMBER.
SoundMain_1:
adds r3, 1
str r3, [r0, o_SoundInfo_ident]
push {r4-r7,lr}
mov r1, r8
mov r2, r9
mov r3, r10
mov r4, r11
push {r0-r4}
sub sp, 0x18
ldrb r1, [r0, o_SoundInfo_maxLines]
cmp r1, 0 @ if maxLines is 0, there is no maximum
beq SoundMain_3
ldr r2, lt_REG_VCOUNT
ldrb r2, [r2]
cmp r2, VCOUNT_VBLANK
bhs SoundMain_2
adds r2, TOTAL_SCANLINES
SoundMain_2:
adds r1, r2
SoundMain_3:
str r1, [sp, 0x14]
ldr r3, [r0, o_SoundInfo_MPlayMainHead]
cmp r3, 0
beq SoundMain_4
ldr r0, [r0, o_SoundInfo_musicPlayerHead]
bl call_r3
ldr r0, [sp, 0x18]
SoundMain_4:
ldr r3, [r0, o_SoundInfo_CgbSound]
bl call_r3
ldr r0, [sp, 0x18]
ldr r3, [r0, o_SoundInfo_pcmSamplesPerVBlank]
mov r8, r3
ldr r5, lt_o_SoundInfo_pcmBuffer
adds r5, r0
ldrb r4, [r0, o_SoundInfo_pcmDmaCounter]
subs r7, r4, 1
bls SoundMain_5
ldrb r1, [r0, o_SoundInfo_pcmDmaPeriod]
subs r1, r7
mov r2, r8
muls r2, r1
adds r5, r2
SoundMain_5:
str r5, [sp, 0x8]
ldr r6, lt_PCM_DMA_BUF_SIZE
ldr r3, lt_SoundMainRAM
bx r3
.align 2, 0
lt_SOUND_INFO_PTR: .word SOUND_INFO_PTR
lt_ID_NUMBER: .word ID_NUMBER
lt_SoundMainRAM: .word SoundMainRAM + 1
lt_REG_VCOUNT: .word REG_VCOUNT
lt_o_SoundInfo_pcmBuffer: .word o_SoundInfo_pcmBuffer
lt_PCM_DMA_BUF_SIZE: .word PCM_DMA_BUF_SIZE
thumb_func_end SoundMain
/* HQ-Mixer rev 4.0 created by ipatix (c) 2021
* licensed under GPLv3, see LICENSE.txt for details */
.equ ENABLE_REVERB, 1 @ <-- if you want faster code or don't like reverb, set this to '0', set to '1' otherwise
.equ ENABLE_DMA, 1 @ <-- Using DMA produces smaller code and has better performance. Disable it if your case does not allow to use DMA.
/*****************
* END OF CONFIG *
*****************/
/* NO USER SERVICABLE CODE BELOW HERE! YOU HAVE BEEN WARNED */
/* globals */
.global SoundMainRAM
.equ FRAME_LENGTH_5734, 0x60
.equ FRAME_LENGTH_7884, 0x84 @ THIS MODE IS NOT SUPPORTED BY THIS ENGINE BECAUSE IT DOESN'T USE AN 8 ALIGNED BUFFER LENGTH
.equ FRAME_LENGTH_10512, 0xB0
.equ FRAME_LENGTH_13379, 0xE0 @ DEFAULT
.equ FRAME_LENGTH_15768, 0x108
.equ FRAME_LENGTH_18157, 0x130
.equ FRAME_LENGTH_21024, 0x160
.equ FRAME_LENGTH_26758, 0x1C0
.equ FRAME_LENGTH_31536, 0x210
.equ FRAME_LENGTH_36314, 0x260
.equ FRAME_LENGTH_40137, 0x2A0
.equ FRAME_LENGTH_42048, 0x2C0
/* stack variables */
.equ ARG_FRAME_LENGTH, 0x0 @ Number of samples per frame/buffer
.equ ARG_REMAIN_CHN, 0x4 @ temporary to count down the channels to process
.equ ARG_BUFFER_POS, 0x8 @ stores the current output buffer pointer
.equ ARG_LOOP_START_POS, 0xC @ stores wave loop start position in channel loop
.equ ARG_LOOP_LENGTH, 0x10 @ '' '' '' end position
.equ ARG_BUFFER_POS_INDEX_HINT, 0x14 @ if this value is == 2, then this is the last buffer before wraparound
.equ ARG_PCM_STRUCT, 0x18 @ pointer to engine the main work area
/* channel struct */
.equ CHN_SAMPLE_STOR, 0x3F @ [byte] contains the previously loaded sample from the linear interpolation
/* pulse wave synth configuration offset */
.equ SYNTH_TYPE, 0x1 @ [byte]
.equ SYNTH_BASE_WAVE_DUTY, 0x2 @ [byte]
.equ SYNTH_WIDTH_CHANGE_1, 0x3 @ [byte]
.equ SYNTH_MOD_AMOUNT, 0x4 @ [byte]
.equ SYNTH_WIDTH_CHANGE_2, 0x5 @ [byte]
.equ MODE_FLGSH_SIGN_REVERSE, 27 @ shift by n bits to get the reverse flag into SIGN
/* variables of the engine work area */
.equ VAR_REVERB, 0x5 @ [byte] 0-127 = reverb level
.equ VAR_MAX_CHN, 0x6 @ [byte] maximum channels to process
.equ VAR_MASTER_VOL, 0x7 @ [byte] PCM master volume
.equ VAR_EXT_NOISE_SHAPE_LEFT, 0xE @ [byte] normally unused, used here for noise shaping
.equ VAR_EXT_NOISE_SHAPE_RIGHT, 0xF @ [byte] normally unused, used here for noise shaping
.equ VAR_DEF_PITCH_FAC, 0x18 @ [word] this value get's multiplied with the samplerate for the inter sample distance
.equ VAR_FIRST_CHN, 0x50 @ [CHN struct] relative offset to channel array
.equ VAR_PCM_BUFFER, 0x350
/* just some more defines */
.equ ARM_OP_LEN, 0x4
/* extensions */
.equ BDPCM_BLK_STRIDE, 0x21
.equ BDPCM_BLK_SIZE, 0x40
.equ BDPCM_BLK_SIZE_MASK, 0x3F
.equ BDPCM_BLK_SIZE_SHIFT, 0x6
.thumb
.align 2
.syntax divided
.section .iwram.code
thumb_func_start SoundMainRAM
SoundMainRAM:
/* load Reverb level and check if we need to apply it */
str r4, [sp, #ARG_BUFFER_POS_INDEX_HINT]
/*
* okay, before the actual mixing starts
* the volume and envelope calculation takes place
*/
mov r4, r8 @ r4 = buffer length
/*
* this stores the buffer length to a backup location
*/
str r4, [sp, #ARG_FRAME_LENGTH]
/* init channel loop */
ldr r4, [sp, #ARG_PCM_STRUCT] @ r4 = main work area pointer
ldr r0, [r4, #VAR_DEF_PITCH_FAC] @ r0 = samplingrate pitch factor
mov r12, r0
ldrb r0, [r4, #VAR_MAX_CHN]
add r4, #VAR_FIRST_CHN @ r4 = Base channel Offset (Channel #0)
C_channel_state_loop:
/* this is the main channel processing loop */
str r0, [sp, #ARG_REMAIN_CHN]
ldr r3, [r4, #o_SoundChannel_wav]
ldrb r6, [r4, #o_SoundChannel_statusFlags] @ r6 will hold the channel status
movs r0, #0xC7 @ check if any of the channel status flags is set
tst r0, r6 @ check if none of the flags is set
beq C_skip_channel
/* check channel flags */
lsl r0, r6, #25 @ shift over the SOUND_CHANNEL_SF_START to CARRY
bcc C_adsr_echo_check @ continue with normal channel procedure
/* check leftmost bit */
bmi C_stop_channel @ SOUND_CHANNEL_SF_START | SOUND_CHANNEL_SF_STOP -> stop directly
/* channel init procedure */
movs r6, #SOUND_CHANNEL_SF_ENV_ATTACK
/* enabled compression if sample flag is set */
movs r0, r3 @ r0 = o_SoundChannel_wav
add r0, #o_WaveData_data @ r0 = wave data offset
ldr r2, [r3, #o_WaveData_size]
cmp r2, #0
beq C_channel_init_synth
ldrb r5, [r3, #o_WaveData_type]
lsl r5, r5, #31
ldrb r5, [r4, #o_SoundChannel_type]
bmi C_channel_init_comp
lsl r5, r5, #27 @ shift TONEDATA_TYPE_REV flag to SIGN
bmi C_channel_init_noncomp_reverse
/* Pokemon games seem to init channels differently than other m4a games */
C_channel_init_noncomp_forward:
ldr r1, [r4, #o_SoundChannel_count]
add r0, r1
sub r2, r1
b C_channel_init_check_loop
C_channel_init_synth:
mov r5, #TONEDATA_TYPE_SPL
strb r5, [r4, #o_SoundChannel_type]
ldrb r1, [r3, #(o_WaveData_data + SYNTH_TYPE)]
cmp r1, #2
bne C_channel_init_check_loop
/* start triangular synth wave at 90 degree phase
* to avoid a pop sound at the start of the wave */
mov r5, #0x40
lsl r5, #24
str r5, [r4, #o_SoundChannel_fw]
mov r5, #0
b C_channel_init_check_loop_no_fine_pos
C_channel_init_noncomp_reverse:
add r0, r2
ldr r1, [r4, #o_SoundChannel_count]
sub r0, r1
sub r2, r1
b C_channel_init_check_loop
C_channel_init_comp:
mov r0, #TONEDATA_TYPE_CMP
orr r5, r0
strb r5, [r4, #o_SoundChannel_type]
lsl r5, r5, #27 @ shift TONEDATA_TYPE_REV flag to SIGN
bmi C_channel_init_comp_reverse
C_channel_init_comp_forward:
ldr r0, [r4, #o_SoundChannel_count]
sub r2, r0
b C_channel_init_check_loop
C_channel_init_comp_reverse:
ldr r1, [r4, #o_SoundChannel_count]
sub r2, r1
mov r0, r2
C_channel_init_check_loop:
movs r5, #0 @ initial envelope = #0
str r5, [r4, #o_SoundChannel_fw]
C_channel_init_check_loop_no_fine_pos:
str r0, [r4, #o_SoundChannel_currentPointer]
str r2, [r4, #o_SoundChannel_count]
strb r5, [r4, #o_SoundChannel_envelopeVolume]
mov r2, #CHN_SAMPLE_STOR @ offset is too large to be used in one instruction
strb r5, [r4, r2]
/* enabled loop if required */
ldrb r2, [r3, #o_WaveData_flags]
lsr r0, r2, #6
beq C_adsr_attack
/* loop enabled here */
add r6, #SOUND_CHANNEL_SF_LOOP
b C_adsr_attack
C_adsr_echo_check:
/* this is the normal ADSR procedure without init */
ldrb r5, [r4, #o_SoundChannel_envelopeVolume]
lsl r0, r6, #29 @ SOUND_CHANNEL_SF_IEC --> bit 31 (sign bit)
bpl C_adsr_release_check
/* pseudo echo handler */
ldrb r0, [r4, #o_SoundChannel_pseudoEchoLength]
sub r0, #1
strb r0, [r4, #o_SoundChannel_pseudoEchoLength]
bhi C_channel_vol_calc @ continue normal if channel is still on
C_stop_channel:
movs r0, #0
strb r0, [r4, #o_SoundChannel_statusFlags]
C_skip_channel:
/* go to end of the channel loop */
b C_end_channel_state_loop
C_adsr_release_check:
lsl r0, r6, #25 @ SOUND_CHANNEL_SF_STOP --> bit 31 (sign bit)
bpl C_adsr_decay_check
/* release handler */
ldrb r0, [r4, #o_SoundChannel_release]
mul r5, r5, r0
lsr r5, #8
ble C_adsr_released
/* pseudo echo init handler */
ldrb r0, [r4, #o_SoundChannel_pseudoEchoVolume]
cmp r5, r0
bhi C_channel_vol_calc
C_adsr_released:
/* if volume released to #0 */
ldrb r5, [r4, #o_SoundChannel_pseudoEchoVolume]
cmp r5, #0
beq C_stop_channel
/* pseudo echo volume handler */
movs r0, #SOUND_CHANNEL_SF_IEC
orr r6, r0 @ set the echo flag
b C_adsr_save_and_finalize
C_adsr_decay_check:
/* check if decay is active */
movs r2, #(SOUND_CHANNEL_SF_ENV_DECAY+SOUND_CHANNEL_SF_ENV_SUSTAIN)
and r2, r6
cmp r2, #SOUND_CHANNEL_SF_ENV_DECAY
bne C_adsr_attack_check @ decay not active yet
/* decay handler */
ldrb r0, [r4, #o_SoundChannel_decay]
mul r5, r5, r0
lsr r5, r5, #8
ldrb r0, [r4, #o_SoundChannel_sustain]
cmp r5, r0
bhi C_channel_vol_calc @ sample didn't decay yet
/* sustain handler */
movs r5, r0 @ current level = sustain level
beq C_adsr_released @ sustain level #0 --> branch
/* step to next phase otherweise */
b C_adsr_next_state
C_adsr_attack_check:
/* attack handler */
cmp r2, #SOUND_CHANNEL_SF_ENV_ATTACK
bne C_channel_vol_calc @ if it isn't in attack attack phase, it has to be in sustain (keep vol) --> branch
C_adsr_attack:
/* apply attack summand */
ldrb r0, [r4, #o_SoundChannel_attack]
add r5, r0
cmp r5, #0xFF
blo C_adsr_save_and_finalize
/* cap attack at 0xFF */
movs r5, #0xFF
C_adsr_next_state:
/* switch to next adsr phase */
sub r6, #1
C_adsr_save_and_finalize:
/* store channel status */
strb r6, [r4, #o_SoundChannel_statusFlags]
C_channel_vol_calc:
/* store the calculated ADSR level */
strb r5, [r4, #o_SoundChannel_envelopeVolume]
/* apply master volume */
ldr r0, [sp, #ARG_PCM_STRUCT]
ldrb r0, [r0, #VAR_MASTER_VOL]
add r0, #1
mul r5, r0
/* left side volume */
ldrb r0, [r4, #o_SoundChannel_leftVolume]
mul r0, r5
lsr r0, #13
mov r10, r0 @ r10 = left volume
/* right side volume */
ldrb r0, [r4, #o_SoundChannel_rightVolume]
mul r0, r5
lsr r0, #13
mov r11, r0 @ r11 = right volume
/*
* Now we get closer to actual mixing:
* For looped samples some additional operations are required
*/
movs r0, #SOUND_CHANNEL_SF_LOOP
and r0, r6
beq C_sample_loop_setup_skip
/* loop setup handler */
add r3, #o_WaveData_loopStart
ldmia r3!, {r0, r1} @ r0 = loop start, r1 = loop end
ldrb r2, [r4, #o_SoundChannel_type]
lsl r2, r2, #MODE_FLGSH_SIGN_REVERSE
bcs C_sample_loop_setup_comp
add r3, r0 @ r3 = loop start position (absolute)
b C_sample_loop_setup_finish
C_sample_loop_setup_comp:
mov r3, r0
C_sample_loop_setup_finish:
str r3, [sp, #ARG_LOOP_START_POS]
sub r0, r1, r0
C_sample_loop_setup_skip:
/* do the rest of the setup */
str r0, [sp, #ARG_LOOP_LENGTH] @ if loop is off --> r0 = 0x0
ldr r5, hq_buffer_literal
ldr r2, [r4, #o_SoundChannel_count]
ldr r3, [r4, #o_SoundChannel_currentPointer]
ldrb r0, [r4, #o_SoundChannel_type]
/* switch to arm */
adr r1, C_mixing_setup
bx r1
.align 2
hq_buffer_literal:
.word hq_buffer_ptr
.arm
.align 2
/* register usage:
* r0: scratch
* r1: scratch
* r2: sample countdown
* r3: sample pointer
* r4: sample step
* r5: mixing buffer
* r6: sampleval base
* r7: sample interpos
* r8: frame count
* r9: scratch
* r10: scratch
* r11: volume
* r12: sampval diff
* lr: scratch */
C_mixing_setup:
/* frequency and mixing loading routine */
ldrsb r6, [r4, #CHN_SAMPLE_STOR]
ldr r8, [sp, #ARG_FRAME_LENGTH]
orrs r11, r11, r10, lsl#16 @ r11 = 00LL00RR
beq C_mixing_epilogue @ volume #0 --> branch and skip channel processing
/* normal processing otherwise */
tst r0, #(TONEDATA_TYPE_CMP|TONEDATA_TYPE_REV)
bne C_mixing_setup_comp_rev
tst r0, #TONEDATA_TYPE_FIX
bne C_setup_fixed_freq_mixing
C_mixing_setup_comp_rev:
stmfd sp!, {r4, r9, r12}
add r4, r4, #o_SoundChannel_fw
ldmia r4, {r7, lr} @ r7 = Fine Position, lr = Frequency
mul r4, lr, r12 @ r4 = inter sample steps = output rate factor * samplerate
tst r0, #TONEDATA_TYPE_SPL
bne C_setup_synth
/*
* Mixing goes with volume ranges 0-127
* They come in 0-255 --> divide by 2 (rounding up)
*/
movs r11, r11, lsr#1
adc r11, r11, #0x8000
bic r11, r11, #0x8000
mov r1, r7 @ r1 = inter sample position
/*
* There is 2 different mixing codepaths for uncompressed data
* path 1: fast mixing, but doesn't supports loop or stop
* path 2: not so fast but supports sample loops / stop
* This checks if there is enough samples aviable for path 1.
* important: r0 is expected to be #0
*/
sub r10, sp, #0x8
tst r0, #TONEDATA_TYPE_FIX
movne r4, #0x800000
movs r0, r0, lsl#(MODE_FLGSH_SIGN_REVERSE)
umlal r1, r0, r4, r8
mov r1, r1, lsr#23
orr r0, r1, r0, lsl#9
bcs C_data_load_comp
bmi C_data_load_uncomp_rev
b C_data_load_uncomp_for
/* registers:
* r9: src address (relative to start address)
* r0: dst address (on stack)
* r12: delta_lookup_table */
F_decode_compressed:
stmfd sp!, {r3, lr}
mov lr, #BDPCM_BLK_SIZE
ldrb r2, [r9], #1
ldrb r3, [r9], #1
b C_bdpcm_decoder_loop_entry
C_bdpcm_decoder_loop:
ldrb r3, [r9], #1
ldrb r2, [r12, r3, lsr#4]
add r2, r1, r2
and r3, r3, #0xF
C_bdpcm_decoder_loop_entry:
ldrb r1, [r12, r3]
add r1, r1, r2
bdpcm_instructions:
nop
nop
subs lr, #2
bgt C_bdpcm_decoder_loop
ldmfd sp!, {r3, pc}
bdpcm_instruction_resource_for:
strb r2, [r0], #1
strb r1, [r0], #1
bdpcm_instruction_resource_rev:
strb r2, [r0, #-1]!
strb r1, [r0, #-1]!
delta_lookup_table:
.byte 0, 1, 4, 9, 16, 25, 36, 49, -64, -49, -36, -25, -16, -9, -4, -1
stack_boundary_literal:
.word 0x03007900
C_data_load_comp:
adrpl r9, bdpcm_instruction_resource_for
adrmi r9, bdpcm_instruction_resource_rev
ldmia r9, {r12, lr}
adr r9, bdpcm_instructions
stmia r9, {r12, lr}
adr r12, delta_lookup_table
bmi C_data_load_comp_rev
C_data_load_comp_for:
/* TODO having loop support for forward samples would be nice */
/* lr = end_of_last_block */
add lr, r3, r0
add lr, #(1+(BDPCM_BLK_SIZE-1)) @ -1 for alignment, +1 because we need an extra sample for interpolation
bic lr, #BDPCM_BLK_SIZE_MASK
/* r9 = start_of_first_block >> 6 */
mov r9, r3, lsr#BDPCM_BLK_SIZE_SHIFT
/* r8 = num_samples */
sub r8, lr, r9, lsl#BDPCM_BLK_SIZE_SHIFT
/* check if stack would overflow */
ldr r1, stack_boundary_literal
add r1, r8
cmp r1, sp
bhs C_end_mixing
/* --- */
add r1, r3, r0
subs r0, r2, r0
stmfd sp!, {r0, r1}
sub sp, r8
bgt C_data_load_comp_for_calc_pos
/* locate end of sample data block */
add r1, r3, r2
/* ugly workaround for unaligned samples */
add r1, r1, #BDPCM_BLK_SIZE_MASK
bic r1, r1, #BDPCM_BLK_SIZE_MASK
sub r1, lr, r1
sub r8, r1
add r0, sp, r8
bl F_clear_mem
C_data_load_comp_for_calc_pos:
and r3, r3, #BDPCM_BLK_SIZE_MASK
mov r0, sp
C_data_load_comp_decode:
ldr r2, [r10, #8] @ load chn_ptr from previous stmfd
@ zero flag should be only set when leaving from F_clear_mem (r1 = 0)
streqb r1, [r2, #o_SoundChannel_statusFlags]
ldr r2, [r2, #o_SoundChannel_wav]
add r2, #o_WaveData_data
mov r1, #BDPCM_BLK_STRIDE
mla r9, r1, r9, r2
C_data_load_comp_loop:
bl F_decode_compressed
subs r8, #BDPCM_BLK_SIZE
bgt C_data_load_comp_loop
b C_select_highspeed_codepath_vla_r3
C_data_load_comp_rev:
/* lr = end_of_last_block */
add lr, r3, #BDPCM_BLK_SIZE_MASK
bic lr, lr, #BDPCM_BLK_SIZE_MASK
/* r9 = start_of_first_block >> 6 */
sub r9, r3, r0
sub r9, #1 @ one extra sample for LERP
mov r9, r9, lsr#BDPCM_BLK_SIZE_SHIFT
/* r8 = num_samples */
sub r8, lr, r9, lsl#BDPCM_BLK_SIZE_SHIFT
/* check if stack would overflow */
ldr lr, stack_boundary_literal
add lr, r8
cmp lr, sp
bhs C_end_mixing
/* --- */
sub lr, r3, r0
subs r0, r2, r0
stmfd sp!, {r0, lr}
mov r0, sp
sub sp, r8
bgt C_data_load_comp_rev_calc_pos
sub r1, r3, r2
sub r1, r1, r9, lsl#BDPCM_BLK_SIZE_SHIFT
sub r8, r1
add r0, sp, r8
bl F_clear_mem
C_data_load_comp_rev_calc_pos:
rsb r3, r3, #0
and r3, r3, #BDPCM_BLK_SIZE_MASK
b C_data_load_comp_decode
C_data_load_uncomp_rev:
/* lr = end_of_last_block */
add lr, r3, #0x3
bic lr, #0x3
/* r9 = start_of_first_block */
sub r9, r3, r0
sub r9, #1
bic r9, #0x3
/* r8 = num_samples */
sub r8, lr, r9
/* check if stack would overflow */
ldr r1, stack_boundary_literal
add r1, r8
cmp r1, sp
bhs C_end_mixing
/* --- */
sub r1, r3, r0
subs r0, r2, r0
stmfd sp!, {r0, r1}
mov r0, sp
sub sp, r8
bgt C_data_load_uncomp_rev_loop
sub r1, r3, r2
sub r1, r9
sub r8, r1
add r0, sp, r8
bl F_clear_mem
ldr r2, [r10, #8] @ load chn_ptr from previous stmfd
@ r1 should be zero here
strb r1, [r2, #o_SoundChannel_statusFlags]
C_data_load_uncomp_rev_loop:
ldmia r9!, {r1}
@ Byteswap
eor r2, r1, r1, ROR#16
mov r2, r2, lsr#8
bic r2, r2, #0xFF00
eor r1, r2, r1, ROR#8
stmdb r0!, {r1}
subs r8, #4
bgt C_data_load_uncomp_rev_loop
rsb r3, r3, #0
b C_select_highspeed_codepath_vla_r3_and3
C_data_load_uncomp_for:
cmp r2, r0 @ actual comparison
ble C_unbuffered_mixing @ if not enough samples are available for path 1 --> branch
/*
* This is the mixer path 1.
* The interesting thing here is that the code will
* buffer enough samples on stack if enough space
* on stack is available (or goes over the limit of 0x400 bytes)
*/
sub r2, r2, r0
ldr r9, stack_boundary_literal
add r9, r0
cmp r9, sp
add r9, r3, r0
/*
* r2 = remaining samples after processing
* r9 = final sample position
* sp = original stack location
* These values will get reloaded after channel processing
* due to the lack of registers.
*/
stmfd sp!, {r2, r9}
cmplo r0, #0x400 @ > 0x400 bytes --> read directly from ROM rather than buffered
bhs C_select_highspeed_codepath
bic r1, r3, #3
add r0, r0, #7
.if ENABLE_DMA==1
/*
* The code below inits the DMA to read word aligned
* samples from ROM to stack
*/
mov r9, #0x04000000 @ REG_DMA3_SRC & 0xFF000000
add r9, #0x000000D4 @ REG_DMA3_SRC & 0x000000FF
mov r0, r0, lsr#2
sub sp, sp, r0, lsl#2
orr lr, r0, #0x84000000 @ DMA enable, 32-bit transfer type
stmia r9, {r1, sp, lr} @ actually starts the DMA
.else
/*
* This alternative path doesn't use DMA but copies with CPU instead
*/
bic r0, r0, #0x3
sub sp, sp, r0
mov lr, sp
stmfd sp!, {r3-r10}
ands r10, r0, #0xE0
rsb r10, r10, #0xF0
add pc, pc, r10, lsr#2
C_copy_loop:
.rept 8 @ duff's device 8 times
ldmia r1!, {r3-r10}
stmia lr!, {r3-r10}
.endr
subs r0, #0x100
bpl C_copy_loop
ands r0, r0, #0x1C
beq C_copy_end
C_copy_loop_rest:
ldmia r1!, {r3}
stmia lr!, {r3}
subs r0, #0x4
bgt C_copy_loop_rest
C_copy_end:
ldmfd sp!, {r3-r10}
.endif
C_select_highspeed_codepath_vla_r3_and3:
and r3, r3, #3
C_select_highspeed_codepath_vla_r3:
add r3, r3, sp
C_select_highspeed_codepath:
stmfd sp!, {r10} @ save original sp for VLA
/*
* This code decides which piece of code to load
* depending on playback-rate / default-rate ratio.
* Modes > 1.0 run with different volume levels.
* r4 = inter sample step
*/
adr r0, high_speed_code_resource @ loads the base pointer of the code
subs r4, r4, #0x800000
movpl r11, r11, lsl#1 @ if >= 1.0* 0-127 --> 0-254 volume level
addpl r0, r0, #(ARM_OP_LEN*6) @ 6 instructions further
subpls r4, r4, #0x800000 @ if >= 2.0*
addpl r0, r0, #(ARM_OP_LEN*6)
addpl r4, r4, #0x800000
ldr r2, previous_fast_code
cmp r0, r2 @ code doesn't need to be reloaded if it's already in place
beq C_skip_fast_mixing_creation
/* This loads the needed code to RAM */
str r0, previous_fast_code
ldmia r0, {r0-r2, r8-r10} @ load 6 opcodes
adr lr, fast_mixing_instructions+(ARM_OP_LEN*2) @ first NOP
C_fast_mixing_creation_loop:
/* paste code to destination, see below for patterns */
stmia lr, {r0, r1}
add lr, lr, #(ARM_OP_LEN*38)
stmia lr, {r0, r1}
sub lr, lr, #(ARM_OP_LEN*35)
stmia lr, {r2, r8-r10}
add lr, lr, #(ARM_OP_LEN*38)
stmia lr, {r2, r8-r10}
sub lr, lr, #(ARM_OP_LEN*32)
adds r5, r5, #0x40000000 @ do that for 4 blocks (unused pointer bits)
bcc C_fast_mixing_creation_loop
C_skip_fast_mixing_creation:
ldr r8, [sp] @ restore r8 with the frame length
ldr r8, [r8, #(ARG_FRAME_LENGTH + 0x8 + 0xC)]
movs r2, #0xFF000000 @ load the fine position overflow bitmask, set NE
ldrsb r12, [r3]
sub r12, r12, r6
C_fast_mixing_loop:
/* This is the actual processing and interpolation code loop; NOPs will be replaced by the code above */
fast_mixing_instructions:
/* mix the first 4 stereo samples, then the next 4. */
.rept 2
ldmia r5, {r0, r1, r10, lr} @ load the next 4 stereo samples
.irp reg, r0, r1, r10, lr
mulne r9, r7, r12
nop @ Block #1
nop
mlane \reg, r11, r9, \reg
nop
nop
nop
nop
bic r7, r7, r2, asr#1
.endr
stmia r5!, {r0, r1, r10, lr} @ write 4 stereo samples
.endr
subs r8, r8, #8
bgt C_fast_mixing_loop
/* restore previously saved values */
ldmfd sp, {sp} @ reload original stack pointer from VLA
C_skip_fast_mixing:
ldmfd sp!, {r2, r3}
b C_end_mixing
/* Various variables for the cached mixer */
.align 2
previous_fast_code:
.word 0x0 /* mark as invalid initially */
/* Those instructions below are used by the high speed loop self modifying code */
high_speed_code_resource:
/* Block for Mix Freq < 1.0 * Output Frequency */
mov r9, r9, asr#22
adds r9, r9, r6, lsl#1
adds r7, r7, r4
addpl r6, r12, r6
ldrplsb r12, [r3, #1]!
subpls r12, r12, r6
/* Block for Mix Freq > 1.0 and < 2.0 * Output Frequency */
adds r9, r6, r9, asr#23
add r6, r12, r6
adds r7, r7, r4
ldrplsb r6, [r3, #1]!
ldrsb r12, [r3, #1]!
subs r12, r12, r6
/* Block for Mix Freq > 2.0 * Output Frequency */
adds r9, r6, r9, asr#23
add r7, r7, r4
add r3, r3, r7, lsr#23
ldrsb r6, [r3]
ldrsb r12, [r3, #1]!
subs r12, r12, r6
/* incase a loop or end occurs during mixing, this code is used */
C_unbuffered_mixing:
ldrsb r12, [r3]
sub r12, r12, r6
add r5, r5, r8, lsl#2 @ r5 = End of HQ buffer
/* This below is the unbuffered mixing loop. r6 = base sample, r12 diff to next */
C_unbuffered_mixing_loop:
mul r9, r7, r12
mov r9, r9, asr#22
adds r9, r9, r6, lsl#1
ldrne r0, [r5, -r8, lsl#2]
mlane r0, r11, r9, r0
strne r0, [r5, -r8, lsl#2]
add r7, r7, r4
movs r9, r7, lsr#23
beq C_unbuffered_mixing_skip_load @ skip the mixing load if it isn't required
subs r2, r2, r9
ble C_unbuffered_mixing_loop_or_end
C_unbuffered_mixing_loop_continue:
subs r9, r9, #1
addeq r6, r12, r6
ldrnesb r6, [r3, r9]!
ldrsb r12, [r3, #1]!
sub r12, r12, r6
bic r7, r7, #0x3F800000
C_unbuffered_mixing_skip_load:
subs r8, r8, #1 @ reduce the sample count for the buffer by #1
bgt C_unbuffered_mixing_loop
C_end_mixing:
ldmfd sp!, {r4, r9, r12}
str r7, [r4, #o_SoundChannel_fw]
strb r6, [r4, #CHN_SAMPLE_STOR]
b C_mixing_end_store
C_unbuffered_mixing_loop_or_end:
/* XXX: r0 or r6? */
/* This loads the loop information end loops incase it should */
ldr r0, [sp, #(ARG_LOOP_LENGTH+0xC)]
cmp r0, #0 @ check if loop is enabled; if Loop is enabled r6 is != 0
subne r3, r3, r0
addne r2, r2, r0
bne C_unbuffered_mixing_loop_continue
ldmfd sp!, {r4, r9, r12}
b C_mixing_end_and_stop_channel @ r0 == 0 (if this branches)
C_fixed_mixing_loop_or_end:
ldr r2, [sp, #ARG_LOOP_LENGTH+0x8]
movs r0, r2 @ copy it to r6 and check whether loop is disabled
ldrne r3, [sp, #ARG_LOOP_START_POS+0x8]
bne C_fixed_mixing_loop_continue
ldmfd sp!, {r4, r9}
C_mixing_end_and_stop_channel:
strb r0, [r4] @ update channel flag with chn halt
b C_mixing_epilogue
/* These are used for the fixed freq mixer */
fixed_mixing_code_resource:
movs r6, r10, lsl#24
movs r6, r6, asr#24
movs r6, r10, lsl#16
movs r6, r6, asr#24
movs r6, r10, lsl#8
movs r6, r6, asr#24
movs r6, r10, asr#24
ldmia r3!, {r10} @ load chunk of samples
movs r6, r10, lsl#24
movs r6, r6, asr#24
movs r6, r10, lsl#16
movs r6, r6, asr#24
movs r6, r10, lsl#8
movs r6, r6, asr#24
C_setup_fixed_freq_mixing:
stmfd sp!, {r4, r9}
C_fixed_mixing_length_check:
cmp r2, r8 @ min(buffer_size, sample_countdown) - 1
subgt lr, r8, #1
suble lr, r2, #1
movs lr, lr, lsr#2
beq C_fixed_mixing_process_rest @ <= 3 samples to process
sub r8, r8, lr, lsl#2 @ subtract the amount of samples we need to process from the buffer length
sub r2, r2, lr, lsl#2 @ subtract the amount of samples we need to process from the remaining samples
adr r1, fixed_mixing_instructions
adr r0, fixed_mixing_code_resource
mov r9, r3, lsl#30
add r0, r0, r9, lsr#27 @ alignment * 8 + resource offset = new resource offset
ldmia r0!, {r6, r7, r9, r10} @ load and write instructions
stmia r1, {r6, r7}
add r1, r1, #0xC
stmia r1, {r9, r10}
add r1, r1, #0xC
ldmia r0, {r6, r7, r9, r10}
stmia r1, {r6, r7}
add r1, r1, #0xC
stmia r1, {r9, r10}
ldmia r3!, {r10} @ load 4 samples from ROM
C_fixed_mixing_loop:
ldmia r5, {r0, r1, r7, r9} @ load 4 samples from hq buffer
fixed_mixing_instructions:
.irp reg, r0, r1, r7, r9
nop
nop
mlane \reg, r11, r6, \reg @ add new sample if neccessary
.endr
stmia r5!, {r0, r1, r7, r9} @ write samples to the mixing buffer
subs lr, lr, #1
bne C_fixed_mixing_loop
sub r3, r3, #4 @ we'll need to load this block again, so rewind a bit
C_fixed_mixing_process_rest:
mov r1, #4 @ repeat the loop #4 times to completely get rid of alignment errors
C_fixed_mixing_unaligned_loop:
ldr r0, [r5]
ldrsb r6, [r3], #1
mla r0, r11, r6, r0
str r0, [r5], #4
subs r2, r2, #1
beq C_fixed_mixing_loop_or_end
C_fixed_mixing_loop_continue:
subs r1, r1, #1
bgt C_fixed_mixing_unaligned_loop
subs r8, r8, #4
bgt C_fixed_mixing_length_check @ repeat the mixing procedure until the buffer is filled
ldmfd sp!, {r4, r9}
C_mixing_end_store:
str r2, [r4, #o_SoundChannel_count]
str r3, [r4, #o_SoundChannel_currentPointer]
C_mixing_epilogue:
/* switch to thumb */
adr r0, (C_end_channel_state_loop+1)
bx r0
.thumb
.thumb_func
C_end_channel_state_loop:
ldr r0, [sp, #ARG_REMAIN_CHN]
sub r0, #1
ble C_main_mixer_return
add r4, #0x40
b C_channel_state_loop
C_main_mixer_return:
ldr r3, [sp, #ARG_PCM_STRUCT]
ldrb r4, [r3, #VAR_EXT_NOISE_SHAPE_LEFT]
lsl r4, r4, #16
ldrb r5, [r3, #VAR_EXT_NOISE_SHAPE_RIGHT]
lsl r5, r5, #16
.if ENABLE_REVERB==1
ldrb r2, [r3, #VAR_REVERB]
lsr r2, r2, #2
ldr r1, [sp, #ARG_BUFFER_POS_INDEX_HINT]
cmp r1, #2
.else
mov r2, #0
mov r3, #0
.endif
/* switch to arm */
adr r0, C_downsampler
bx r0
.arm
.align 2
C_downsampler:
ldr r8, [sp, #ARG_FRAME_LENGTH]
ldr r9, [sp, #ARG_BUFFER_POS]
.if ENABLE_REVERB==1
orr r2, r2, r2, lsl#16
movne r3, r8
addeq r3, r3, #VAR_PCM_BUFFER
subeq r3, r3, r9
.endif
ldr r10, hq_buffer_literal
mov r11, #0xFF00
mov lr, #0xC0000000
C_downsampler_loop:
ldmia r10, {r0, r1}
add r12, r4, r0 @ left sample #1
adds r4, r12, r12