1 /** 2 * Implement the plain color fill style. dplug:canvas internals. 3 * 4 * Copyright: Copyright Chris Jones 2020. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module dplug.canvas.colorblit; 8 9 import dplug.canvas.rasterizer; 10 import dplug.canvas.misc; 11 12 /* 13 ColorBlit 14 */ 15 16 nothrow: 17 @nogc: 18 19 struct ColorBlit 20 { 21 nothrow: 22 @nogc: 23 24 void init(ubyte* pixels, size_t strideBytes, int height, uint color) 25 { 26 assert(height > 0); 27 28 this.pixels = pixels; 29 this.strideBytes = strideBytes; 30 this.height = height; 31 this.color = color; 32 33 __m128i xmColor = _mm_loadu_si32 (&color); 34 xmColor = _mm_unpacklo_epi8 (xmColor, _mm_setzero_si128()); 35 xmColor = _mm_unpacklo_epi64 (xmColor, xmColor); 36 __m128i xmAlpha = _mm_set1_epi16 (cast(ushort) ((color >> 24) << 8)); 37 _mm_storeu_si128(cast(__m128i*)_xmColor.ptr, xmColor); 38 _mm_storeu_si128(cast(__m128i*)_xmAlpha.ptr, xmAlpha); 39 } 40 41 private: 42 43 void color_blit(WindingRule wr)(int* delta, DMWord* mask, int x0, int x1, int y) 44 { 45 assert(x0 >= 0); 46 ulong sbytes = strideBytes; 47 assert(x1 * 4 <= sbytes); 48 assert(y >= 0); 49 assert(y < height); 50 assert((x0 & 3) == 0); 51 assert((x1 & 3) == 0); 52 53 // main blit variables 54 55 int bpos = x0 / 4; 56 int endbit = x1 / 4; 57 uint* dest = cast(uint*)(&pixels[y*strideBytes]); 58 __m128i xmWinding = 0; 59 bool isopaque = (color >> 24) == 0xFF; 60 61 // XMM constants 62 63 immutable __m128i XMZERO = 0; 64 immutable __m128i XMMSK16 = [0xFFFF,0xFFFF,0xFFFF,0xFFFF]; 65 66 // paint variables 67 __m128i xmColor = _mm_loadu_si128(cast(__m128i*)_xmColor); 68 __m128i xmAlpha = _mm_loadu_si128(cast(__m128i*)_xmAlpha); 69 70 // main loop 71 72 while (bpos < endbit) 73 { 74 int nsb = nextSetBit(mask, bpos, endbit); 75 76 // do we have a span of unchanging coverage? 77 78 if (bpos < nsb) 79 { 80 // Calc coverage of first pixel 81 82 static if (wr == WindingRule.NonZero) 83 { 84 int cover = xmWinding[3]+delta[bpos*4]; 85 cover = abs(cover)*2; 86 if (cover > 0xFFFF) cover = 0xFFFF; 87 } 88 else 89 { 90 int cover = xmWinding[3]+delta[bpos*4]; 91 short tsc = cast(short) cover; 92 cover = (tsc ^ (tsc >> 15)) << 1; 93 } 94 95 // We can skip the span 96 97 if (cover < 0x100) 98 { 99 bpos = nsb; 100 } 101 102 // Or fill span with solid color 103 104 else if (isopaque && (cover > 0xFF00)) 105 { 106 __m128i tqc = _mm_set1_epi32(color); 107 108 uint* ptr = &dest[bpos*4]; 109 uint* end = &dest[nsb*4]; 110 111 while (ptr < end) 112 { 113 _mm_storeu_si128(cast(__m128i*)ptr, tqc); 114 ptr+=4; 115 } 116 117 bpos = nsb; 118 } 119 120 // Or fill the span with transparent color 121 122 else 123 { 124 __m128i tpma = _mm_set1_epi16(cast(ushort) cover); 125 tpma = _mm_mulhi_epu16(xmAlpha,tpma); 126 __m128i tpmc = _mm_mulhi_epu16(xmColor,tpma); 127 tpmc = _mm_packus_epi16(tpmc,tpmc); 128 tpma = _mm_not_si128(tpma); // 1-alpha 129 130 uint* ptr = &dest[bpos*4]; 131 uint* end = &dest[nsb*4]; 132 133 while (ptr < end) 134 { 135 __m128i d0 = _mm_loadu_si128(cast(__m128i*)ptr); 136 __m128i d1 = _mm_unpackhi_epi8(d0,XMZERO); 137 d0 = _mm_unpacklo_epi8(d0,XMZERO); 138 d0 = _mm_mulhi_epu16(d0,tpma); 139 d1 = _mm_mulhi_epu16(d1,tpma); 140 d0 = _mm_packus_epi16(d0,d1); 141 d0 = _mm_adds_epu8(d0,tpmc); 142 _mm_storeu_si128(cast(__m128i*)ptr,d0); 143 ptr+=4; 144 } 145 146 bpos = nsb; 147 } 148 } 149 150 // At this point we need to integrate scandelta 151 152 uint* ptr = &dest[bpos*4]; 153 uint* end = &dest[endbit*4]; 154 int* dlptr = &delta[bpos*4]; 155 156 while (ptr < end) 157 { 158 // Integrate delta values 159 160 __m128i tqw = _mm_loadu_si128(cast(__m128i*)dlptr); 161 tqw = _mm_add_epi32(tqw, _mm_slli_si128!4(tqw)); 162 tqw = _mm_add_epi32(tqw, _mm_slli_si128!8(tqw)); 163 tqw = _mm_add_epi32(tqw, xmWinding); 164 xmWinding = _mm_shuffle_epi32!255(tqw); 165 _mm_storeu_si128(cast(__m128i*)dlptr,XMZERO); 166 167 // Process coverage values taking account of winding rule 168 169 static if (wr == WindingRule.NonZero) 170 { 171 __m128i tcvr = _mm_srai_epi32(tqw,31); 172 tqw = _mm_add_epi32(tcvr,tqw); 173 tqw = _mm_xor_si128(tqw,tcvr); // abs 174 tcvr = _mm_packs_epi32(tqw,XMZERO); // saturate/pack to int16 175 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16 176 } 177 else 178 { 179 __m128i tcvr = _mm_and_si128(tqw,XMMSK16); 180 tqw = _mm_srai_epi16(tcvr,15); // mask 181 tcvr = _mm_xor_si128(tcvr,tqw); // fold in halff 182 tcvr = _mm_packs_epi32(tcvr,XMZERO); // pack to int16 183 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16 184 } 185 186 // Load destination pixels 187 __m128i d01 = _mm_loadu_si128(cast(__m128i*) ptr); 188 __m128i d0 = _mm_unpacklo_epi8 (d01, XMZERO); 189 __m128i d1 = _mm_unpackhi_epi8 (d01, XMZERO); 190 191 // muliply source alpha & coverage 192 193 __m128i a0 = _mm_mulhi_epu16(tcvr,xmAlpha); 194 a0 = _mm_unpacklo_epi16(a0,a0); 195 __m128i a1 = _mm_unpackhi_epi32(a0,a0); 196 a0 = _mm_unpacklo_epi32(a0,a0); 197 198 // r = alpha*color + dest - alpha*dest 199 200 __m128i r0 = _mm_mulhi_epu16(xmColor,a0); 201 __m128i tmp = _mm_mulhi_epu16(d0,a0); 202 r0 = _mm_add_epi16(r0, d0); 203 r0 = _mm_sub_epi16(r0, tmp); 204 205 __m128i r1 = _mm_mulhi_epu16(xmColor,a1); 206 tmp = _mm_mulhi_epu16(d1,a1); 207 r1 = _mm_add_epi16(r1, d1); 208 r1 = _mm_sub_epi16(r1, tmp); 209 210 __m128i r01 = _mm_packus_epi16(r0,r1); 211 212 _mm_storeu_si128(cast(__m128i*)ptr,r01); 213 214 bpos++; 215 ptr+=4; 216 dlptr+=4; 217 218 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break; 219 } 220 } 221 } 222 223 ubyte* pixels; 224 size_t strideBytes; 225 int height; 226 uint color; 227 ubyte[16] _xmColor; 228 ubyte[16] _xmAlpha; 229 } 230 231 void doBlit_ColorBlit(void* userData, int* delta, DMWord* mask, int x0, int x1, int y) 232 { 233 ColorBlit* cb = cast(ColorBlit*)userData; 234 return cb.color_blit!(WindingRule.NonZero)(delta, mask, x0, x1, y); 235 }