-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathf24sqrt.z80
201 lines (179 loc) · 2.92 KB
/
f24sqrt.z80
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#ifndef included_f24sqrt
#define included_f24sqrt
#include "sqrt16.z80"
f24sqrt:
;sqrt(AHL) ==> AHL
;return NaN if the input is negative, except:
;"IEEE 754 defines sqrt(-0.) as -0."
; - https://stackoverflow.com/a/19232238/3303651
add a,a
jr nc,+_
rra
ret z
ld a,$7F
ld h,a
ret
_:
ret z
;so 0 and negative numbers are taken care of
;check if input is NaN or inf
;sqrt(NaN) ==> NaN
;sqrt(inf) ==> inf
rra
cp $7F
ret z
;Now adjust the significand for our square-root routine
;scf
ld c,0
rr h
rr l
rr c
;if the exponent is odd, need to shift right again
;also need to compute the new exponent as (A-1)>>1 + 32
dec a
rra
jr c,+_
rr h
rr l
rr c
_:
add a,32
;save the exponent
push af
call f24sqrt_sqrt
;need to generate one more bit and a rounding bit
;this can be done by doing (AHL/2)/DE for two iterations
;Note that A is either 0 or 1
or a
ld bc,0
sbc hl,de
sbc a,b
jr nc,$+5
add hl,de
adc a,b
inc c
add hl,hl
adc a,a
sbc hl,de
sbc a,b
;bottom bit in C is the inverse of what needs to be shifted in to DE
;meanwhile, carry flag is going to be used for rounding
ld a,c
rra
ccf
ex de,hl
adc hl,hl
add a,a ;now A is 0
ld c,a
ccf
adc hl,bc
pop de
adc a,d
ret
f24sqrt_sqrt:
;returns HL as the 9.7 fixed-point square-root of the upper 18 bits of HLC
call sqrtHL ;expects returns A as sqrt, HL as remainder, D = 0
add a,a
ld e,a
rl d
ld a,c
sll e \ rl d
add a,a \ adc hl,hl
add a,a \ adc hl,hl
;A is now 0
sbc hl,de
jr nc,+_
add hl,de
dec e
.db $FE ;start of `cp *`
_:
inc e
sll e \ rl d
add hl,hl
add hl,hl
sbc hl,de
jr nc,+_
add hl,de
dec e
.db $FE ;start of `cp *`
_:
inc e
sll e \ rl d
add hl,hl
add hl,hl
sbc hl,de
jr nc,+_
add hl,de
dec e
.db $FE ;start of `cp *`
_:
inc e
sll e \ rl d
add hl,hl
add hl,hl
sbc hl,de
jr nc,+_
add hl,de
dec e
.db $FE ;start of `cp *`
_:
inc e
;Now we have four more iterations
;The first two are no problem
sll e \ rl d
add hl,hl
add hl,hl
sbc hl,de
jr nc,+_
add hl,de
dec e
.db $FE ;start of `cp *`
_:
inc e
sll e \ rl d
add hl,hl
add hl,hl
sbc hl,de
jr nc,+_
add hl,de
dec e
.db $FE ;start of `cp *`
_:
inc e
sqrt32_iter15:
;On the next iteration, HL might temporarily overflow by 1 bit
sll e \ rl d ;sla e \ rl d \ inc e
add hl,hl
add hl,hl ;This might overflow!
jr c,sqrt32_iter15_br0
;
sbc hl,de
jr nc,+_
add hl,de
dec e
jr sqrt32_iter16
sqrt32_iter15_br0:
or a
sbc hl,de
_:
inc e
;On the next iteration, HL is allowed to overflow, DE could overflow with our current routine, but it needs to be shifted right at the end, anyways
sqrt32_iter16:
ld c,a ;0x00
add hl,hl
rla
add hl,hl
rla
;AHL - (DE+DE+1)
sbc hl,de \ sbc a,c
inc e
or a
sbc hl,de \ sbc a,c
ret p
add hl,de
adc a,c
dec e
add hl,de
adc a,c
ret
#endif