1 /**
2 * Implement the plain color fill style. dplug:canvas internals.
3 *
4 * Copyright: Copyright Chris Jones 2020.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module dplug.canvas.colorblit;
8 
9 import dplug.canvas.rasterizer;
10 import dplug.canvas.misc;
11 
12 /*
13   ColorBlit
14 */
15 
16 nothrow:
17 @nogc:
18 
19 struct ColorBlit
20 {   
21 nothrow:
22 @nogc:
23 
24     void init(ubyte* pixels, size_t strideBytes, int height, uint color)
25     {
26         assert(height > 0);
27         
28         this.pixels = pixels;
29         this.strideBytes = strideBytes;
30         this.height = height;
31         this.color = color;
32 
33         __m128i xmColor = _mm_loadu_si32 (&color);
34         xmColor = _mm_unpacklo_epi8 (xmColor, _mm_setzero_si128());
35         xmColor = _mm_unpacklo_epi64 (xmColor, xmColor);
36         __m128i xmAlpha = _mm_set1_epi16 (cast(ushort) ((color >> 24) << 8));
37         _mm_storeu_si128(cast(__m128i*)_xmColor.ptr, xmColor);
38         _mm_storeu_si128(cast(__m128i*)_xmAlpha.ptr, xmAlpha);
39     }
40 
41 private:
42 
43     void color_blit(WindingRule wr)(int* delta, DMWord* mask, int x0, int x1, int y)
44     {
45         assert(x0 >= 0);
46         ulong sbytes = strideBytes;
47         assert(x1 * 4 <= sbytes);
48         assert(y >= 0);
49         assert(y < height);
50         assert((x0 & 3) == 0);
51         assert((x1 & 3) == 0);
52 
53         // main blit variables
54 
55         int bpos = x0 / 4;
56         int endbit = x1 / 4;
57         uint* dest = cast(uint*)(&pixels[y*strideBytes]);
58         __m128i xmWinding = 0;
59         bool isopaque = (color >> 24) == 0xFF;
60 
61         // XMM constants
62 
63         immutable __m128i XMZERO = 0;
64         immutable __m128i XMMSK16 = [0xFFFF,0xFFFF,0xFFFF,0xFFFF];
65 
66         // paint variables
67         __m128i xmColor = _mm_loadu_si128(cast(__m128i*)_xmColor);
68         __m128i xmAlpha = _mm_loadu_si128(cast(__m128i*)_xmAlpha);
69 
70         // main loop
71 
72         while (bpos < endbit)
73         {
74             int nsb = nextSetBit(mask, bpos, endbit);
75 
76             // do we have a span of unchanging coverage?
77 
78             if (bpos < nsb)
79             {
80                 // Calc coverage of first pixel
81 
82                 static if (wr == WindingRule.NonZero)
83                 {
84                     int cover = xmWinding[3]+delta[bpos*4];
85                     cover = abs(cover)*2;
86                     if (cover > 0xFFFF) cover = 0xFFFF;
87                 }
88                 else
89                 {
90                     int cover = xmWinding[3]+delta[bpos*4];
91                     short tsc = cast(short) cover;
92                     cover = (tsc ^ (tsc >> 15)) << 1;
93                 }
94 
95                 // We can skip the span
96 
97                 if (cover < 0x100)
98                 {
99                     bpos = nsb;
100                 }
101 
102                 // Or fill span with solid color
103 
104                 else if (isopaque && (cover > 0xFF00))
105                 {
106                     __m128i tqc = _mm_set1_epi32(color);
107 
108                     uint* ptr = &dest[bpos*4];
109                     uint* end = &dest[nsb*4];
110 
111                     while (ptr < end)
112                     {
113                         _mm_storeu_si128(cast(__m128i*)ptr, tqc);
114                         ptr+=4;                        
115                     }
116 
117                     bpos = nsb;
118                 }
119 
120                 // Or fill the span with transparent color
121 
122                 else
123                 {
124                     __m128i tpma = _mm_set1_epi16(cast(ushort) cover); 
125                     tpma = _mm_mulhi_epu16(xmAlpha,tpma);
126                     __m128i tpmc = _mm_mulhi_epu16(xmColor,tpma);
127                     tpmc = _mm_packus_epi16(tpmc,tpmc);
128                     tpma  = _mm_not_si128(tpma); // 1-alpha
129 
130                     uint* ptr = &dest[bpos*4];
131                     uint* end = &dest[nsb*4];
132 
133                     while (ptr < end)
134                     {
135                         __m128i d0 = _mm_loadu_si128(cast(__m128i*)ptr);
136                         __m128i d1 = _mm_unpackhi_epi8(d0,XMZERO);
137                         d0 = _mm_unpacklo_epi8(d0,XMZERO);
138                         d0 = _mm_mulhi_epu16(d0,tpma);
139                         d1 = _mm_mulhi_epu16(d1,tpma);
140                         d0 = _mm_packus_epi16(d0,d1);
141                         d0 =  _mm_adds_epu8(d0,tpmc);
142                         _mm_storeu_si128(cast(__m128i*)ptr,d0);
143                         ptr+=4;
144                     }
145 
146                     bpos = nsb;
147                 }
148             }
149 
150             // At this point we need to integrate scandelta
151 
152             uint* ptr = &dest[bpos*4];
153             uint* end = &dest[endbit*4];
154             int* dlptr = &delta[bpos*4];
155 
156             while (ptr < end)
157             {
158                 // Integrate delta values
159 
160                 __m128i tqw = _mm_loadu_si128(cast(__m128i*)dlptr);
161                 tqw = _mm_add_epi32(tqw, _mm_slli_si128!4(tqw)); 
162                 tqw = _mm_add_epi32(tqw, _mm_slli_si128!8(tqw)); 
163                 tqw = _mm_add_epi32(tqw, xmWinding); 
164                 xmWinding = _mm_shuffle_epi32!255(tqw);  
165                 _mm_storeu_si128(cast(__m128i*)dlptr,XMZERO);
166 
167                 // Process coverage values taking account of winding rule
168                 
169                 static if (wr == WindingRule.NonZero)
170                 {
171                     __m128i tcvr = _mm_srai_epi32(tqw,31); 
172                     tqw = _mm_add_epi32(tcvr,tqw);
173                     tqw = _mm_xor_si128(tqw,tcvr);        // abs
174                     tcvr = _mm_packs_epi32(tqw,XMZERO);   // saturate/pack to int16
175                     tcvr = _mm_slli_epi16(tcvr, 1);       // << to uint16
176                 }
177                 else
178                 {
179                     __m128i tcvr = _mm_and_si128(tqw,XMMSK16); 
180                     tqw = _mm_srai_epi16(tcvr,15);       // mask
181                     tcvr = _mm_xor_si128(tcvr,tqw);      // fold in halff
182                     tcvr = _mm_packs_epi32(tcvr,XMZERO); // pack to int16
183                     tcvr = _mm_slli_epi16(tcvr, 1);      // << to uint16
184                 } 
185 
186                 // Load destination pixels
187                 __m128i d01 = _mm_loadu_si128(cast(__m128i*) ptr);
188                 __m128i d0 = _mm_unpacklo_epi8 (d01, XMZERO);
189                 __m128i d1 = _mm_unpackhi_epi8 (d01, XMZERO);
190 
191                 // muliply source alpha & coverage
192 
193                 __m128i a0 = _mm_mulhi_epu16(tcvr,xmAlpha);
194                 a0 = _mm_unpacklo_epi16(a0,a0); 
195                 __m128i a1 = _mm_unpackhi_epi32(a0,a0);
196                 a0 = _mm_unpacklo_epi32(a0,a0);
197 
198                 // r = alpha*color + dest - alpha*dest
199 
200                 __m128i r0 = _mm_mulhi_epu16(xmColor,a0);
201                 __m128i tmp = _mm_mulhi_epu16(d0,a0);
202                 r0 = _mm_add_epi16(r0, d0);
203                 r0 = _mm_sub_epi16(r0, tmp);
204 
205                 __m128i r1 = _mm_mulhi_epu16(xmColor,a1);
206                 tmp   = _mm_mulhi_epu16(d1,a1);
207                 r1 = _mm_add_epi16(r1, d1);
208                 r1 = _mm_sub_epi16(r1, tmp);
209 
210                 __m128i r01 = _mm_packus_epi16(r0,r1);
211 
212                 _mm_storeu_si128(cast(__m128i*)ptr,r01);
213                 
214                 bpos++;
215                 ptr+=4;
216                 dlptr+=4;
217 
218                 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break;
219             }
220         }
221     }
222 
223     ubyte* pixels;
224     size_t strideBytes;
225     int height;
226     uint color;
227     ubyte[16] _xmColor;
228     ubyte[16] _xmAlpha;
229 }
230 
231  void doBlit_ColorBlit(void* userData, int* delta, DMWord* mask, int x0, int x1, int y)
232  {
233      ColorBlit* cb = cast(ColorBlit*)userData;
234      return cb.color_blit!(WindingRule.NonZero)(delta, mask, x0, x1, y);
235  }