1 /** 2 * Implement the plain color fill style. dplug:canvas internals. 3 * 4 * Copyright: Copyright Chris Jones 2020. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module dplug.canvas.colorblit; 8 9 import dplug.canvas.rasterizer; 10 import dplug.canvas.misc; 11 12 /* 13 ColorBlit 14 */ 15 16 nothrow: 17 @nogc: 18 19 struct ColorBlit 20 { 21 nothrow: 22 @nogc: 23 24 void init(uint color) 25 { 26 this.color = color; 27 28 __m128i xmColor = _mm_loadu_si32 (&color); 29 xmColor = _mm_unpacklo_epi8 (xmColor, _mm_setzero_si128()); 30 xmColor = _mm_unpacklo_epi64 (xmColor, xmColor); 31 __m128i xmAlpha = _mm_set1_epi16 (cast(ushort) ((color >> 24) << 8)); 32 _mm_storeu_si128(cast(__m128i*)_xmColor.ptr, xmColor); 33 _mm_storeu_si128(cast(__m128i*)_xmAlpha.ptr, xmAlpha); 34 } 35 36 private: 37 38 void color_blit(WindingRule wr)(uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) 39 { 40 assert(x0 >= 0); 41 assert(y >= 0); 42 assert((x0 & 3) == 0); 43 assert((x1 & 3) == 0); 44 45 // main blit variables 46 47 int bpos = x0 / 4; 48 int endbit = x1 / 4; 49 __m128i xmWinding = 0; 50 bool isopaque = (color >> 24) == 0xFF; 51 52 // XMM constants 53 54 immutable __m128i XMZERO = 0; 55 immutable __m128i XMMSK16 = [0xFFFF,0xFFFF,0xFFFF,0xFFFF]; 56 57 // paint variables 58 __m128i xmColor = _mm_loadu_si128(cast(__m128i*)_xmColor); 59 __m128i xmAlpha = _mm_loadu_si128(cast(__m128i*)_xmAlpha); 60 61 // main loop 62 63 while (bpos < endbit) 64 { 65 int nsb = nextSetBit(mask, bpos, endbit); 66 67 // do we have a span of unchanging coverage? 68 69 if (bpos < nsb) 70 { 71 // Calc coverage of first pixel 72 73 static if (wr == WindingRule.NonZero) 74 { 75 int cover = xmWinding[3]+delta[bpos*4]; 76 cover = abs(cover)*2; 77 if (cover > 0xFFFF) cover = 0xFFFF; 78 } 79 else 80 { 81 int cover = xmWinding[3]+delta[bpos*4]; 82 short tsc = cast(short) cover; 83 cover = (tsc ^ (tsc >> 15)) << 1; 84 } 85 86 // We can skip the span 87 88 if (cover < 0x100) 89 { 90 bpos = nsb; 91 } 92 93 // Or fill span with solid color 94 95 else if (isopaque && (cover > 0xFF00)) 96 { 97 __m128i tqc = _mm_set1_epi32(color); 98 99 uint* ptr = &dest[bpos*4]; 100 uint* end = &dest[nsb*4]; 101 102 while (ptr < end) 103 { 104 _mm_storeu_si128(cast(__m128i*)ptr, tqc); 105 ptr+=4; 106 } 107 108 bpos = nsb; 109 } 110 111 // Or fill the span with transparent color 112 113 else 114 { 115 __m128i tpma = _mm_set1_epi16(cast(ushort) cover); 116 tpma = _mm_mulhi_epu16(xmAlpha,tpma); 117 __m128i tpmc = _mm_mulhi_epu16(xmColor,tpma); 118 tpmc = _mm_packus_epi16(tpmc,tpmc); 119 tpma = _mm_not_si128(tpma); // 1-alpha 120 121 uint* ptr = &dest[bpos*4]; 122 uint* end = &dest[nsb*4]; 123 124 while (ptr < end) 125 { 126 __m128i d0 = _mm_loadu_si128(cast(__m128i*)ptr); 127 __m128i d1 = _mm_unpackhi_epi8(d0,XMZERO); 128 d0 = _mm_unpacklo_epi8(d0,XMZERO); 129 d0 = _mm_mulhi_epu16(d0,tpma); 130 d1 = _mm_mulhi_epu16(d1,tpma); 131 d0 = _mm_packus_epi16(d0,d1); 132 d0 = _mm_adds_epu8(d0,tpmc); 133 _mm_storeu_si128(cast(__m128i*)ptr,d0); 134 ptr+=4; 135 } 136 137 bpos = nsb; 138 } 139 } 140 141 // At this point we need to integrate scandelta 142 143 uint* ptr = &dest[bpos*4]; 144 uint* end = &dest[endbit*4]; 145 int* dlptr = &delta[bpos*4]; 146 147 while (ptr < end) 148 { 149 // Integrate delta values 150 151 __m128i tqw = _mm_loadu_si128(cast(__m128i*)dlptr); 152 tqw = _mm_add_epi32(tqw, _mm_slli_si128!4(tqw)); 153 tqw = _mm_add_epi32(tqw, _mm_slli_si128!8(tqw)); 154 tqw = _mm_add_epi32(tqw, xmWinding); 155 xmWinding = _mm_shuffle_epi32!255(tqw); 156 _mm_storeu_si128(cast(__m128i*)dlptr,XMZERO); 157 158 // Process coverage values taking account of winding rule 159 160 static if (wr == WindingRule.NonZero) 161 { 162 __m128i tcvr = _mm_srai_epi32(tqw,31); 163 tqw = _mm_add_epi32(tcvr,tqw); 164 tqw = _mm_xor_si128(tqw,tcvr); // abs 165 tcvr = _mm_packs_epi32(tqw,XMZERO); // saturate/pack to int16 166 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16 167 } 168 else 169 { 170 __m128i tcvr = _mm_and_si128(tqw,XMMSK16); 171 tqw = _mm_srai_epi16(tcvr,15); // mask 172 tcvr = _mm_xor_si128(tcvr,tqw); // fold in halff 173 tcvr = _mm_packs_epi32(tcvr,XMZERO); // pack to int16 174 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16 175 } 176 177 // Load destination pixels 178 __m128i d01 = _mm_loadu_si128(cast(__m128i*) ptr); 179 __m128i d0 = _mm_unpacklo_epi8 (d01, XMZERO); 180 __m128i d1 = _mm_unpackhi_epi8 (d01, XMZERO); 181 182 // muliply source alpha & coverage 183 184 __m128i a0 = _mm_mulhi_epu16(tcvr,xmAlpha); 185 a0 = _mm_unpacklo_epi16(a0,a0); 186 __m128i a1 = _mm_unpackhi_epi32(a0,a0); 187 a0 = _mm_unpacklo_epi32(a0,a0); 188 189 // r = alpha*color + dest - alpha*dest 190 191 __m128i r0 = _mm_mulhi_epu16(xmColor,a0); 192 __m128i tmp = _mm_mulhi_epu16(d0,a0); 193 r0 = _mm_add_epi16(r0, d0); 194 r0 = _mm_sub_epi16(r0, tmp); 195 196 __m128i r1 = _mm_mulhi_epu16(xmColor,a1); 197 tmp = _mm_mulhi_epu16(d1,a1); 198 r1 = _mm_add_epi16(r1, d1); 199 r1 = _mm_sub_epi16(r1, tmp); 200 201 __m128i r01 = _mm_packus_epi16(r0,r1); 202 203 _mm_storeu_si128(cast(__m128i*)ptr,r01); 204 205 bpos++; 206 ptr+=4; 207 dlptr+=4; 208 209 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break; 210 } 211 } 212 } 213 214 uint color; 215 ubyte[16] _xmColor; 216 ubyte[16] _xmAlpha; 217 } 218 219 void doBlit_ColorBlit_NonZero(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) 220 { 221 ColorBlit* cb = cast(ColorBlit*)userData; 222 return cb.color_blit!(WindingRule.NonZero)(dest, delta, mask, x0, x1, y); 223 } 224 225 void doBlit_ColorBlit_EvenOdd(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) 226 { 227 ColorBlit* cb = cast(ColorBlit*)userData; 228 return cb.color_blit!(WindingRule.EvenOdd)(dest, delta, mask, x0, x1, y); 229 }