-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathraster.cpp
237 lines (209 loc) · 9.46 KB
/
raster.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
// Copyright 2013, Andrew Ross
// Distributable under the GNU LGPL v2.1 , see COPYING for details
#include <vector>
#include "vecgen.hpp"
#include "raster.hpp"
using namespace std;
// Some theory first. Each attribute is defined to be linear in 3D
// (X/Y/W -- not Z, which is the scaled fragment depth and acts here
// as an attribute; W is the linear eye space depth coordinate), that
// is:
//
// att = A*x + B*y + C*w + D for some A/B/C/D
//
// But D can be zero by construction. Why? The interpolation over
// the triangle is constrained only in the 2D plane of the triangle --
// the family of linear functions 3D functions that match it can be
// sheared outside the plane in any direction we choose. So we simply
// choose the shear to draw a line-of-constant-attribute from the
// origin to the point on the plane where att is zero. This works as
// long as the origin is not on the plane (but that's OK because such
// a case is degenerate: the triangle is edge-on). Thus:
//
// att = A*x + B*y * C*w
//
// Recognize that we're interpolating in post-perspective-divide
// screen space using "sx" proportional to x/w and "sy" that goes as
// y/w. Rescaling A/B/C we can then write:
//
// att = A*sx*w + B*sy*w + C*w
// = w * (A*sx + B*sy + C)
// att/w = A*sx + B*sy + C
//
// That is, "att/w" is linear in screen space. We don't even need to
// solve for A/B/C: For each attribute-over-w "Q", the 2D attribute steps
// can be gotten via a 2x2 matrix inverse (which is dependent only on
// coordinates and can be computed once, not per-attribute):
//
// (bq-aq) = (bx-ax)*dqdx + (by-ay)*dqdy
// (cq-aq) = (cx-ax)*dqdx + (cy-ay)*dqdy
//
// |bx-ax by-ay| |dqdx| |bq-aq|
// |cx-ax cy-ay| * |dqdy| = |cq-aq|
//
// |dqdx| |bx-ax by-ay|-1 |bq-aq|
// |dqdy| == |cx-ax cy-ay| * |cq-aq|
static void swapif(const vr &test, vr &a, vr &b)
{
vr a2=a;
a = mux(test, b, a);
b = mux(test, a2, b);
}
void rasterize_tri(vecgen *vg, int viewx, int viewy, int vieww, int viewh,
vr *a, vr *b, vr *c, int natt_in, vr *aatt_in, vr *batt_in, vr *catt_in,
frag_fn fragment, void *frag_arg)
{
// Might be immediates
for(int i=0; i<4; i++) { a[i].setvg(vg); b[i].setvg(vg); c[i].setvg(vg); }
for(int i=0; i<natt_in; i++) { aatt_in[i].setvg(vg); batt_in[i].setvg(vg); catt_in[i].setvg(vg); }
// Renaming
vr ax=a[0], ay=a[1], az=a[2], aw=a[3];
vr bx=b[0], by=b[1], bz=b[2], bw=b[3];
vr cx=c[0], cy=c[1], cz=c[2], cw=c[3];
// flip Y sign to convert cartesian to raster
ay ^= imm_i(0x80000000);
by ^= imm_i(0x80000000);
cy ^= imm_i(0x80000000);
// Perspective division. Note we use a coordinate "m" to
// represent 1/w (get it?!).
vr am = recip2(aw); ax *= am; ay *= am; az *= am;
vr bm = recip2(bw); bx *= bm; by *= bm; bz *= bm;
vr cm = recip2(cw); cx *= cm; cy *= cm; cz *= cm;
// Convert [-1:1] to [viewx/y,viewx/y+vieww/h] screen coordinates,
// placing the origin at the center (!) of the bottom/left pixel.
ax = ax * (vieww/2) + ((vieww/2.) + viewx - 0.5);
ay = ay * (viewh/2) + ((viewh/2.) + viewy - 0.5);
bx = bx * (vieww/2) + ((vieww/2.) + viewx - 0.5);
by = by * (viewh/2) + ((viewh/2.) + viewy - 0.5);
cx = cx * (vieww/2) + ((vieww/2.) + viewx - 0.5);
cy = cy * (viewh/2) + ((viewh/2.) + viewy - 0.5);
// Assemble a set of attributes (stored as att/w): include Z and
// M, even though they interpolate somewhat differently, just to
// have a place to put them.
int natt = natt_in + 2, zi = natt-2, mi = zi+1;
vector<vr> aatt, batt, catt;
for(int i=0; i<natt_in; i++) {
aatt.push_back(aatt_in[i] * am);
batt.push_back(batt_in[i] * bm);
catt.push_back(catt_in[i] * cm);
}
aatt.push_back(az); aatt.push_back(am);
batt.push_back(bz); batt.push_back(bm);
catt.push_back(cz); catt.push_back(cm);
// Compute the matrix inverse for the 2D att/w derivatives
vr m00 = cy-ay, m01 = ay-by;
vr m10 = ax-cx, m11 = bx-ax;
vr idet = recip2(m00*m11 - m01*m10);
m00 *= idet; m01 *= idet; m10 *= idet; m11 *= idet;
// Build the 2D derivatives
vector<vr> ddx(natt), ddy(natt);
for(int i=0; i<zi; i++) {
vr ba = batt[i] - aatt[i], ca = catt[i] - aatt[i];
ddx[i] = ba*m00 + ca*m01;
ddy[i] = ba*m10 + ca*m11;
}
ddx[zi] = (bz-az)*m00 + (cz-az)*m01;
ddy[zi] = (bz-az)*m10 + (cz-az)*m11;
ddx[mi] = (bm-am)*m00 + (cm-am)*m01;
ddy[mi] = (bm-am)*m10 + (cm-am)*m11;
// Now we can interpolate anywhere on the plane given only the 2D
// screen coordinates, so figure out the pixels to rasterize.
// First sort the 2D points by Y into top/mid/bottom coordinates:
vr topx=ax, midx=bx, botx=cx, topy=ay, midy=by, boty=cy;
vr sw0 = topy > midy; swapif(sw0, topx, midx); swapif(sw0, topy, midy);
vr sw1 = topy > boty; swapif(sw1, topx, botx); swapif(sw1, topy, boty);
vr sw2 = midy > boty; swapif(sw2, midx, botx); swapif(sw2, midy, boty);
// And a left/right edge line (expressed as x=q*y+o) for each
// half-triangle. The left edge is the one with a lower q slope.
// NOTE: the slope NaNs out (recip2 doesn't do Inf properly) when
// faced with a horizontal upper or lower edge, that will still
// work as long as the code never "uses" the zero-size half
// triangle (i.e. get the < vs. <= tests right). FIXME: MUST TEST
// THAT CONDITION.
vr botlq = (midx-botx) * recip2(midy-boty);
vr botlo = botx - botlq * boty;
vr botrq = (topx-botx) * recip2(topy-boty);
vr botro = botx - botrq * boty;
vr toplq = (topx-midx) * recip2(topy-midy);
vr toplo = topx - toplq * topy;
vr toprq = botrq;
vr topro = botro;
// Sort left/right:
vr swapedge = toprq < toplq;
swapif(swapedge, botro, botlo); swapif(swapedge, botrq, botlq);
swapif(swapedge, topro, toplo); swapif(swapedge, toprq, toplq);
// Now loop over spans from the top down, we're doing 2x2 fragment
// blocks at a time over {x,y}{0,1}, where 0,0 is the bottom left
vr y0 = max(ceil(topy), viewy);
vr maxy = min(boty, viewy+viewh);
VRLOOP(*vg) {
vr y1 = y0 + 1;
// Figure out the q/o values for the left/right sides of each
// span and compute span boundaries.
vr ql0 = mux(y0 < midy, toplq, botlq), ol0 = mux(y0 < midy, toplo, botlo);
vr qr0 = mux(y0 < midy, toprq, botrq), or0 = mux(y0 < midy, topro, botro);
vr ql1 = mux(y1 < midy, toplq, botlq), ol1 = mux(y1 < midy, toplo, botlo);
vr qr1 = mux(y1 < midy, toprq, botrq), or1 = mux(y1 < midy, topro, botro);
vr xmin0 = ql0 * y0 + ol0, xmax0 = qr0 * y0 + or0;
vr xmin1 = ql1 * y1 + ol1, xmax1 = qr1 * y1 + or1;
// clamp to viewport.
xmin0 = max(xmin0, viewx-.5); xmax0 = min(xmax0, viewx+vieww-.5);
xmin1 = max(xmin1, viewx-.5); xmax1 = min(xmax1, viewx+vieww-.5);
vr x0 = floor(min(xmin0, xmin1)), xmax = max(xmax0, xmax1);
// Compute attributes for the bottom left corner
vector<vr> att00(natt);
for(int i=0; i<natt; i++)
att00[i] = aatt[i] + (x0-ax)*ddx[i] + (y0-ay)*ddy[i];
VRLOOP(*vg) {
vr x1 = x0 + 1;
// Figure the other three fragments' attributes:
vector<vr> att01(natt), att10(natt), att11(natt);
for(int i=0; i<natt; i++) {
att01[i] = att00[i] + ddx[i]; // bottom right
att10[i] = att00[i] + ddy[i]; // top left
att11[i] = att01[i] + ddy[i]; // top right
}
// Compute W for each fragment and multiply by att/w
// (except for Z of course) to get the true attribute
// value. Save off att01 pre-munging to use for computing
// the next iteration.
vector<vr> att01_copy = att01;
vector<vr> *atts[] = { &att00, &att01, &att10, &att11 };
for(int i=0; i<4; i++) {
vr w = recip((*atts[i])[mi]); // per-pixel recip is the fast/approximate version
for(int j=0; j<zi; j++)
(*atts[i])[j] *= w;
}
// Four fragments:
for(int i=0; i<4; i++) {
vector<vr> rec, *att = atts[i];
int top = !(i & 2), right = !!(i & 1);
// Build the attribute/derivatives array (3 entries
// each: value/ddx/ddy)
for(int j=0; j<mi; j++) {
vr jddx = (*atts[ 1 + (top<<1) ])[j] - (*atts[ 0 + (top<<1) ])[j];
vr jddy = (*atts[ 2 + right ])[j] - (*atts[ 0 + right ])[j];
rec.push_back((*att)[j]);
rec.push_back(jddx);
rec.push_back(jddy);
}
// Fragment inclusion check vs. viewport and Zfar.
vr x = right ? x1 : x0, y = top ? y0 : y1, z = (*att)[zi];
vr span0 = top ? xmin0 : xmin1, span1 = top ? xmax0 : xmax1;
vr dofrag = ((x >= span0 && x <= span1) &&
(top ? z <= 1 : (z <= 1 && y < maxy)));
VRIF(*vg, dofrag)
fragment(x, y, &rec[0], rec.size(), frag_arg);
}
// Update bottom left "seed" for next fragment quad
for(int i=0; i<natt; i++)
att00[i] = att01_copy[i] + ddx[i];
x0 += 2;
VRIF(*vg, x0 >= xmax)
vg->break_loop();
}
y0 += 2;
VRIF(*vg, y0 > maxy)
vg->break_loop();
}
}