1 /**
2 * Implement the elliptic gradient fill style. dplug:canvas internals.
3 *
4 * Copyright: Copyright Chris Jones 2020.
5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module dplug.canvas.ellipticalblit;
8
9 import dplug.canvas.rasterizer;
10 import dplug.canvas.gradient;
11 import dplug.canvas.misc;
12
13 struct EllipticalBlit
14 {
15 nothrow:
16 @nogc:
17
18 void init(Gradient g, float x0, float y0, float x1, float y1, float r2)
19 {
20 assert(g !is null);
21 assert(isPow2(g.lutLength));
22 this.gradient = g;
23 int lutsize = g.lutLength;
24
25 xctr = x0;
26 yctr = y0;
27 float w = x1-x0;
28 float h = y1-y0;
29 float hyp = w*w + h*h;
30 if (hyp < 1.0) hyp = 1.0;
31 xstep0 = lutsize * w / hyp;
32 ystep0 = lutsize * h / hyp;
33 hyp = sqrt(hyp);
34 xstep1 = lutsize * h / (r2*hyp);
35 ystep1 = lutsize * -w / (r2*hyp);
36 }
37
38 private:
39
40 void color_blit(WindingRule wr)(uint* dest, int* delta, DMWord* mask, int x0, int x1, int y)
41 {
42 assert(x0 >= 0);
43 assert(y >= 0);
44 assert((x0 & 3) == 0);
45 assert((x1 & 3) == 0);
46
47 // main blit variables
48
49 int bpos = x0 / 4;
50 int endbit = x1 / 4;
51
52 __m128i xmWinding = 0;
53 uint* lut = gradient.getLookup.ptr;
54 short lutMax = cast(short)(gradient.lutLength - 1);
55 bool isopaque = false;//gradient.isOpaque
56
57 // XMM constants
58
59 immutable __m128i XMZERO = 0;
60 immutable __m128i XMFFFF = [0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF];
61 immutable __m128i XMMSK16 = [0xFFFF,0xFFFF,0xFFFF,0xFFFF];
62
63 // paint variables
64
65 float t0 = (bpos*4-xctr)*xstep0 + (y-yctr)*ystep0;
66 __m128 xmT0 = _mm_mul_ps(_mm_set1_ps(xstep0), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f));
67 xmT0 = _mm_add_ps(xmT0, _mm_set1_ps(t0));
68 __m128 xmStep0 = _mm_set1_ps(xstep0*4);
69
70 float t1 = (bpos*4-xctr)*xstep1 + (y-yctr)*ystep1;
71 __m128 xmT1 = _mm_mul_ps(_mm_set1_ps(xstep1), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f));
72 xmT1 = _mm_add_ps(xmT1, _mm_set1_ps(t1));
73 __m128 xmStep1 = _mm_set1_ps(xstep1*4);
74
75 // main loop
76
77 while (bpos < endbit)
78 {
79 int nsb = nextSetBit(mask, bpos, endbit);
80
81 // do we have a span of unchanging coverage?
82
83 if (bpos < nsb)
84 {
85 // Calc coverage of first pixel
86
87 static if (wr == WindingRule.NonZero)
88 {
89 int cover = xmWinding[3]+delta[bpos*4];
90 cover = abs(cover)*2;
91 if (cover > 0xFFFF) cover = 0xFFFF;
92 }
93 else
94 {
95 int cover = xmWinding[3]+delta[bpos*4];
96 short tsc = cast(short) cover;
97 cover = (tsc ^ (tsc >> 15)) * 2;
98 }
99
100 // We can skip the span
101
102 if (cover == 0)
103 {
104 __m128 tsl = _mm_set1_ps(nsb-bpos);
105 xmT0 = _mm_add_ps(xmT0, _mm_mul_ps(tsl,xmStep0));
106 xmT1 = _mm_add_ps(xmT1, _mm_mul_ps(tsl,xmStep1));
107 bpos = nsb;
108 }
109
110 // Or fill span with soid color
111
112 else if (isopaque && (cover > 0xFF00))
113 {
114 uint* ptr = &dest[bpos*4];
115 uint* end = ptr + ((nsb-bpos)*4);
116
117 while (ptr < end)
118 {
119 __m128 rad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1));
120 rad = _mm_sqrt_ps(rad);
121 xmT0 = xmT0 + xmStep0;
122 xmT1 = xmT1 + xmStep1;
123 __m128i ipos = _mm_cvttps_epi32 (rad);
124 ipos = _mm_clamp_0_to_N_epi32(ipos, lutMax);
125
126 ptr[0] = lut[ ipos.array[0] ];
127 ptr[1] = lut[ ipos.array[1] ];
128 ptr[2] = lut[ ipos.array[2] ];
129 ptr[3] = lut[ ipos.array[3] ];
130
131 ptr+=4;
132 }
133
134 bpos = nsb;
135 }
136
137 // Or fill span with transparent color
138
139 else
140 {
141 __m128i tqcvr = _mm_set1_epi16 (cast(ushort) cover);
142
143 uint* ptr = &dest[bpos*4];
144 uint* end = &dest[nsb*4];
145
146 while (ptr < end)
147 {
148 __m128 rad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1));
149 xmT0 = xmT0 + xmStep0;
150 xmT1 = xmT1 + xmStep1;
151 rad = _mm_sqrt_ps(rad);
152
153 __m128i d0 = _mm_loadu_si64 (ptr);
154 d0 = _mm_unpacklo_epi8 (d0, XMZERO);
155 __m128i d1 = _mm_loadu_si64 (ptr+2);
156 d1 = _mm_unpacklo_epi8 (d1, XMZERO);
157
158 __m128i ipos = _mm_cvttps_epi32 (rad);
159 ipos = _mm_clamp_0_to_N_epi32(ipos, lutMax);
160
161 __m128i c0 = _mm_loadu_si32 (&lut[ ipos.array[0] ]);
162 __m128i tnc = _mm_loadu_si32 (&lut[ ipos.array[1] ]);
163 c0 = _mm_unpacklo_epi32 (c0, tnc);
164 c0 = _mm_unpacklo_epi8 (c0, XMZERO);
165 __m128i a0 = _mm_broadcast_alpha(c0);
166 a0 = _mm_mulhi_epu16(a0, tqcvr);
167
168 __m128i c1 = _mm_loadu_si32 (&lut[ ipos.array[2] ]);
169 tnc = _mm_loadu_si32 (&lut[ ipos.array[3] ]);
170 c1 = _mm_unpacklo_epi32 (c1, tnc);
171 c1 = _mm_unpacklo_epi8 (c1, XMZERO);
172 __m128i a1 = _mm_broadcast_alpha(c1);
173 a1 = _mm_mulhi_epu16(a1, tqcvr);
174
175 // alpha*source + dest - alpha*dest
176
177 c0 = _mm_mulhi_epu16 (c0,a0);
178 c1 = _mm_mulhi_epu16 (c1,a1);
179 c0 = _mm_adds_epi16 (c0,d0);
180 c1 = _mm_adds_epi16 (c1,d1);
181 d0 = _mm_mulhi_epu16 (d0,a0);
182 d1 = _mm_mulhi_epu16 (d1,a1);
183 c0 = _mm_subs_epi16 (c0, d0);
184 c1 = _mm_subs_epi16 (c1, d1);
185
186 d0 = _mm_packus_epi16 (c0,c1);
187
188 _mm_storeu_si128 (cast(__m128i*)ptr,d0);
189
190 ptr+=4;
191 }
192
193 bpos = nsb;
194 }
195 }
196
197 // At this point we need to integrate scandelta
198
199 uint* ptr = &dest[bpos*4];
200 uint* end = &dest[endbit*4];
201 int* dlptr = &delta[bpos*4];
202
203 while (bpos < endbit)
204 {
205 __m128 rad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1));
206 rad = _mm_sqrt_ps(rad);
207
208 // Integrate delta values
209
210 __m128i tqw = _mm_loadu_si128(cast(__m128i*)dlptr);
211 tqw = _mm_add_epi32(tqw, _mm_slli_si128!4(tqw));
212 tqw = _mm_add_epi32(tqw, _mm_slli_si128!8(tqw));
213 tqw = _mm_add_epi32(tqw, xmWinding);
214 xmWinding = _mm_shuffle_epi32!255(tqw);
215 _mm_storeu_si128(cast(__m128i*)dlptr,XMZERO);
216
217 // convert grad pos to integer
218
219 __m128i ipos = _mm_cvttps_epi32(rad);
220 ipos = _mm_clamp_0_to_N_epi32(ipos, lutMax);
221 xmT0 = xmT0 + xmStep0;
222 xmT1 = xmT1 + xmStep1;
223
224 // Process coverage values taking account of winding rule
225
226 static if (wr == WindingRule.NonZero)
227 {
228 __m128i tcvr = _mm_srai_epi32(tqw,31);
229 tqw = _mm_add_epi32(tcvr,tqw);
230 tqw = _mm_xor_si128(tqw,tcvr); // abs
231 tcvr = _mm_packs_epi32(tqw,XMZERO); // saturate/pack to int16
232 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16
233 }
234 else
235 {
236 __m128i tcvr = _mm_and_si128(tqw,XMMSK16);
237 tqw = _mm_srai_epi16(tcvr,15); // mask
238 tcvr = _mm_xor_si128(tcvr,tqw); // fold in halff
239 tcvr = _mm_packs_epi32(tcvr,XMZERO); // pack to int16
240 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16
241 }
242
243 // Load destination pixels
244
245 __m128i d0 = _mm_loadu_si64 (ptr);
246 d0 = _mm_unpacklo_epi8 (d0, XMZERO);
247 __m128i d1 = _mm_loadu_si64 (ptr+2);
248 d1 = _mm_unpacklo_epi8 (d1, XMZERO);
249
250 // load grad colors
251
252 tcvr = _mm_unpacklo_epi16 (tcvr, tcvr);
253 __m128i tcvr2 = _mm_unpackhi_epi32 (tcvr, tcvr);
254 tcvr = _mm_unpacklo_epi32 (tcvr, tcvr);
255
256 __m128i c0 = _mm_loadu_si32 (&lut[ ipos.array[0] ]);
257 __m128i tnc = _mm_loadu_si32 (&lut[ ipos.array[1] ]);
258 c0 = _mm_unpacklo_epi32 (c0, tnc);
259 c0 = _mm_unpacklo_epi8 (c0, XMZERO);
260 __m128i a0 = _mm_broadcast_alpha(c0);
261 a0 = _mm_mulhi_epu16(a0, tcvr);
262
263 __m128i c1 = _mm_loadu_si32 (&lut[ ipos.array[2] ]);
264 tnc = _mm_loadu_si32 (&lut[ ipos.array[3] ]);
265 c1 = _mm_unpacklo_epi32 (c1, tnc);
266 c1 = _mm_unpacklo_epi8 (c1, XMZERO);
267 __m128i a1 = _mm_broadcast_alpha(c1);
268 a1 = _mm_mulhi_epu16(a1, tcvr2);
269
270 // alpha*source + dest - alpha*dest
271
272 c0 = _mm_mulhi_epu16 (c0,a0);
273 c1 = _mm_mulhi_epu16 (c1,a1);
274 c0 = _mm_adds_epi16 (c0,d0);
275 c1 = _mm_adds_epi16 (c1,d1);
276 d0 = _mm_mulhi_epu16 (d0,a0);
277 d1 = _mm_mulhi_epu16 (d1,a1);
278 c0 = _mm_subs_epi16 (c0, d0);
279 c1 = _mm_subs_epi16 (c1, d1);
280
281 d0 = _mm_packus_epi16 (c0,c1);
282
283 _mm_storeu_si128 (cast(__m128i*)ptr,d0);
284
285 bpos++;
286 ptr+=4;
287 dlptr+=4;
288
289 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break;
290 }
291 }
292 }
293
294 // Member variables
295
296 Gradient gradient;
297 float xctr,yctr;
298 float xstep0,ystep0;
299 float xstep1,ystep1;
300 }
301
302 void doBlit_EllipticalBlit_NonZero(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) nothrow @nogc
303 {
304 EllipticalBlit* cb = cast(EllipticalBlit*)userData;
305 return cb.color_blit!(WindingRule.NonZero)(dest, delta, mask, x0, x1, y);
306 }
307
308 void doBlit_EllipticalBlit_EvenOdd(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) nothrow @nogc
309 {
310 EllipticalBlit* cb = cast(EllipticalBlit*)userData;
311 return cb.color_blit!(WindingRule.EvenOdd)(dest, delta, mask, x0, x1, y);
312 }
313