1 /** 2 * Implement the elliptic gradient fill style. dplug:canvas internals. 3 * 4 * Copyright: Copyright Chris Jones 2020. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module dplug.canvas.ellipticalblit; 8 9 import dplug.canvas.rasterizer; 10 import dplug.canvas.gradient; 11 import dplug.canvas.misc; 12 13 struct EllipticalBlit 14 { 15 nothrow: 16 @nogc: 17 18 void init(Gradient g, float x0, float y0, float x1, float y1, float r2) 19 { 20 assert(g !is null); 21 assert(isPow2(g.lutLength)); 22 this.gradient = g; 23 int lutsize = g.lutLength; 24 25 xctr = x0; 26 yctr = y0; 27 float w = x1-x0; 28 float h = y1-y0; 29 float hyp = w*w + h*h; 30 if (hyp < 1.0) hyp = 1.0; 31 xstep0 = lutsize * w / hyp; 32 ystep0 = lutsize * h / hyp; 33 hyp = sqrt(hyp); 34 xstep1 = lutsize * h / (r2*hyp); 35 ystep1 = lutsize * -w / (r2*hyp); 36 } 37 38 private: 39 40 void color_blit(WindingRule wr)(uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) 41 { 42 assert(x0 >= 0); 43 assert(y >= 0); 44 assert((x0 & 3) == 0); 45 assert((x1 & 3) == 0); 46 47 // main blit variables 48 49 int bpos = x0 / 4; 50 int endbit = x1 / 4; 51 52 __m128i xmWinding = 0; 53 uint* lut = gradient.getLookup.ptr; 54 short lutMax = cast(short)(gradient.lutLength - 1); 55 bool isopaque = false;//gradient.isOpaque 56 57 // XMM constants 58 59 immutable __m128i XMZERO = 0; 60 immutable __m128i XMFFFF = [0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF]; 61 immutable __m128i XMMSK16 = [0xFFFF,0xFFFF,0xFFFF,0xFFFF]; 62 63 // paint variables 64 65 float t0 = (bpos*4-xctr)*xstep0 + (y-yctr)*ystep0; 66 __m128 xmT0 = _mm_mul_ps(_mm_set1_ps(xstep0), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f)); 67 xmT0 = _mm_add_ps(xmT0, _mm_set1_ps(t0)); 68 __m128 xmStep0 = _mm_set1_ps(xstep0*4); 69 70 float t1 = (bpos*4-xctr)*xstep1 + (y-yctr)*ystep1; 71 __m128 xmT1 = _mm_mul_ps(_mm_set1_ps(xstep1), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f)); 72 xmT1 = _mm_add_ps(xmT1, _mm_set1_ps(t1)); 73 __m128 xmStep1 = _mm_set1_ps(xstep1*4); 74 75 // main loop 76 77 while (bpos < endbit) 78 { 79 int nsb = nextSetBit(mask, bpos, endbit); 80 81 // do we have a span of unchanging coverage? 82 83 if (bpos < nsb) 84 { 85 // Calc coverage of first pixel 86 87 static if (wr == WindingRule.NonZero) 88 { 89 int cover = xmWinding[3]+delta[bpos*4]; 90 cover = abs(cover)*2; 91 if (cover > 0xFFFF) cover = 0xFFFF; 92 } 93 else 94 { 95 int cover = xmWinding[3]+delta[bpos*4]; 96 short tsc = cast(short) cover; 97 cover = (tsc ^ (tsc >> 15)) * 2; 98 } 99 100 // We can skip the span 101 102 if (cover == 0) 103 { 104 __m128 tsl = _mm_set1_ps(nsb-bpos); 105 xmT0 = _mm_add_ps(xmT0, _mm_mul_ps(tsl,xmStep0)); 106 xmT1 = _mm_add_ps(xmT1, _mm_mul_ps(tsl,xmStep1)); 107 bpos = nsb; 108 } 109 110 // Or fill span with soid color 111 112 else if (isopaque && (cover > 0xFF00)) 113 { 114 uint* ptr = &dest[bpos*4]; 115 uint* end = ptr + ((nsb-bpos)*4); 116 117 while (ptr < end) 118 { 119 __m128 rad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1)); 120 rad = _mm_sqrt_ps(rad); 121 xmT0 = xmT0 + xmStep0; 122 xmT1 = xmT1 + xmStep1; 123 __m128i ipos = _mm_cvttps_epi32 (rad); 124 ipos = _mm_clamp_0_to_N_epi32(ipos, lutMax); 125 126 ptr[0] = lut[ ipos.array[0] ]; 127 ptr[1] = lut[ ipos.array[1] ]; 128 ptr[2] = lut[ ipos.array[2] ]; 129 ptr[3] = lut[ ipos.array[3] ]; 130 131 ptr+=4; 132 } 133 134 bpos = nsb; 135 } 136 137 // Or fill span with transparent color 138 139 else 140 { 141 __m128i tqcvr = _mm_set1_epi16 (cast(ushort) cover); 142 143 uint* ptr = &dest[bpos*4]; 144 uint* end = &dest[nsb*4]; 145 146 while (ptr < end) 147 { 148 __m128 rad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1)); 149 xmT0 = xmT0 + xmStep0; 150 xmT1 = xmT1 + xmStep1; 151 rad = _mm_sqrt_ps(rad); 152 153 __m128i d0 = _mm_loadu_si64 (ptr); 154 d0 = _mm_unpacklo_epi8 (d0, XMZERO); 155 __m128i d1 = _mm_loadu_si64 (ptr+2); 156 d1 = _mm_unpacklo_epi8 (d1, XMZERO); 157 158 __m128i ipos = _mm_cvttps_epi32 (rad); 159 ipos = _mm_clamp_0_to_N_epi32(ipos, lutMax); 160 161 __m128i c0 = _mm_loadu_si32 (&lut[ ipos.array[0] ]); 162 __m128i tnc = _mm_loadu_si32 (&lut[ ipos.array[1] ]); 163 c0 = _mm_unpacklo_epi32 (c0, tnc); 164 c0 = _mm_unpacklo_epi8 (c0, XMZERO); 165 __m128i a0 = _mm_broadcast_alpha(c0); 166 a0 = _mm_mulhi_epu16(a0, tqcvr); 167 168 __m128i c1 = _mm_loadu_si32 (&lut[ ipos.array[2] ]); 169 tnc = _mm_loadu_si32 (&lut[ ipos.array[3] ]); 170 c1 = _mm_unpacklo_epi32 (c1, tnc); 171 c1 = _mm_unpacklo_epi8 (c1, XMZERO); 172 __m128i a1 = _mm_broadcast_alpha(c1); 173 a1 = _mm_mulhi_epu16(a1, tqcvr); 174 175 // alpha*source + dest - alpha*dest 176 177 c0 = _mm_mulhi_epu16 (c0,a0); 178 c1 = _mm_mulhi_epu16 (c1,a1); 179 c0 = _mm_adds_epi16 (c0,d0); 180 c1 = _mm_adds_epi16 (c1,d1); 181 d0 = _mm_mulhi_epu16 (d0,a0); 182 d1 = _mm_mulhi_epu16 (d1,a1); 183 c0 = _mm_subs_epi16 (c0, d0); 184 c1 = _mm_subs_epi16 (c1, d1); 185 186 d0 = _mm_packus_epi16 (c0,c1); 187 188 _mm_storeu_si128 (cast(__m128i*)ptr,d0); 189 190 ptr+=4; 191 } 192 193 bpos = nsb; 194 } 195 } 196 197 // At this point we need to integrate scandelta 198 199 uint* ptr = &dest[bpos*4]; 200 uint* end = &dest[endbit*4]; 201 int* dlptr = &delta[bpos*4]; 202 203 while (bpos < endbit) 204 { 205 __m128 rad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1)); 206 rad = _mm_sqrt_ps(rad); 207 208 // Integrate delta values 209 210 __m128i tqw = _mm_loadu_si128(cast(__m128i*)dlptr); 211 tqw = _mm_add_epi32(tqw, _mm_slli_si128!4(tqw)); 212 tqw = _mm_add_epi32(tqw, _mm_slli_si128!8(tqw)); 213 tqw = _mm_add_epi32(tqw, xmWinding); 214 xmWinding = _mm_shuffle_epi32!255(tqw); 215 _mm_storeu_si128(cast(__m128i*)dlptr,XMZERO); 216 217 // convert grad pos to integer 218 219 __m128i ipos = _mm_cvttps_epi32(rad); 220 ipos = _mm_clamp_0_to_N_epi32(ipos, lutMax); 221 xmT0 = xmT0 + xmStep0; 222 xmT1 = xmT1 + xmStep1; 223 224 // Process coverage values taking account of winding rule 225 226 static if (wr == WindingRule.NonZero) 227 { 228 __m128i tcvr = _mm_srai_epi32(tqw,31); 229 tqw = _mm_add_epi32(tcvr,tqw); 230 tqw = _mm_xor_si128(tqw,tcvr); // abs 231 tcvr = _mm_packs_epi32(tqw,XMZERO); // saturate/pack to int16 232 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16 233 } 234 else 235 { 236 __m128i tcvr = _mm_and_si128(tqw,XMMSK16); 237 tqw = _mm_srai_epi16(tcvr,15); // mask 238 tcvr = _mm_xor_si128(tcvr,tqw); // fold in halff 239 tcvr = _mm_packs_epi32(tcvr,XMZERO); // pack to int16 240 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16 241 } 242 243 // Load destination pixels 244 245 __m128i d0 = _mm_loadu_si64 (ptr); 246 d0 = _mm_unpacklo_epi8 (d0, XMZERO); 247 __m128i d1 = _mm_loadu_si64 (ptr+2); 248 d1 = _mm_unpacklo_epi8 (d1, XMZERO); 249 250 // load grad colors 251 252 tcvr = _mm_unpacklo_epi16 (tcvr, tcvr); 253 __m128i tcvr2 = _mm_unpackhi_epi32 (tcvr, tcvr); 254 tcvr = _mm_unpacklo_epi32 (tcvr, tcvr); 255 256 __m128i c0 = _mm_loadu_si32 (&lut[ ipos.array[0] ]); 257 __m128i tnc = _mm_loadu_si32 (&lut[ ipos.array[1] ]); 258 c0 = _mm_unpacklo_epi32 (c0, tnc); 259 c0 = _mm_unpacklo_epi8 (c0, XMZERO); 260 __m128i a0 = _mm_broadcast_alpha(c0); 261 a0 = _mm_mulhi_epu16(a0, tcvr); 262 263 __m128i c1 = _mm_loadu_si32 (&lut[ ipos.array[2] ]); 264 tnc = _mm_loadu_si32 (&lut[ ipos.array[3] ]); 265 c1 = _mm_unpacklo_epi32 (c1, tnc); 266 c1 = _mm_unpacklo_epi8 (c1, XMZERO); 267 __m128i a1 = _mm_broadcast_alpha(c1); 268 a1 = _mm_mulhi_epu16(a1, tcvr2); 269 270 // alpha*source + dest - alpha*dest 271 272 c0 = _mm_mulhi_epu16 (c0,a0); 273 c1 = _mm_mulhi_epu16 (c1,a1); 274 c0 = _mm_adds_epi16 (c0,d0); 275 c1 = _mm_adds_epi16 (c1,d1); 276 d0 = _mm_mulhi_epu16 (d0,a0); 277 d1 = _mm_mulhi_epu16 (d1,a1); 278 c0 = _mm_subs_epi16 (c0, d0); 279 c1 = _mm_subs_epi16 (c1, d1); 280 281 d0 = _mm_packus_epi16 (c0,c1); 282 283 _mm_storeu_si128 (cast(__m128i*)ptr,d0); 284 285 bpos++; 286 ptr+=4; 287 dlptr+=4; 288 289 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break; 290 } 291 } 292 } 293 294 // Member variables 295 296 Gradient gradient; 297 float xctr,yctr; 298 float xstep0,ystep0; 299 float xstep1,ystep1; 300 } 301 302 void doBlit_EllipticalBlit_NonZero(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) nothrow @nogc 303 { 304 EllipticalBlit* cb = cast(EllipticalBlit*)userData; 305 return cb.color_blit!(WindingRule.NonZero)(dest, delta, mask, x0, x1, y); 306 } 307 308 void doBlit_EllipticalBlit_EvenOdd(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) nothrow @nogc 309 { 310 EllipticalBlit* cb = cast(EllipticalBlit*)userData; 311 return cb.color_blit!(WindingRule.EvenOdd)(dest, delta, mask, x0, x1, y); 312 } 313