1 /**
2 * Implement the plain color fill style. dplug:canvas internals.
3 *
4 * Copyright: Copyright Chris Jones 2020.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module dplug.canvas.colorblit;
8 
9 import dplug.canvas.rasterizer;
10 import dplug.canvas.misc;
11 
12 /*
13   ColorBlit
14 */
15 
16 nothrow:
17 @nogc:
18 
19 struct ColorBlit
20 {   
21 nothrow:
22 @nogc:
23 
24     void init(uint color)
25     {
26         this.color = color;
27 
28         __m128i xmColor = _mm_loadu_si32 (&color);
29         xmColor = _mm_unpacklo_epi8 (xmColor, _mm_setzero_si128());
30         xmColor = _mm_unpacklo_epi64 (xmColor, xmColor);
31         __m128i xmAlpha = _mm_set1_epi16 (cast(ushort) ((color >> 24) << 8));
32         _mm_storeu_si128(cast(__m128i*)_xmColor.ptr, xmColor);
33         _mm_storeu_si128(cast(__m128i*)_xmAlpha.ptr, xmAlpha);
34     }
35 
36 private:
37 
38     void color_blit(WindingRule wr)(uint* dest, int* delta, DMWord* mask, int x0, int x1, int y)
39     {
40         assert(x0 >= 0);
41         assert(y >= 0);
42         assert((x0 & 3) == 0);
43         assert((x1 & 3) == 0);
44 
45         // main blit variables
46 
47         int bpos = x0 / 4;
48         int endbit = x1 / 4;
49         __m128i xmWinding = 0;
50         bool isopaque = (color >> 24) == 0xFF;
51 
52         // XMM constants
53 
54         immutable __m128i XMZERO = 0;
55         immutable __m128i XMMSK16 = [0xFFFF,0xFFFF,0xFFFF,0xFFFF];
56 
57         // paint variables
58         __m128i xmColor = _mm_loadu_si128(cast(__m128i*)_xmColor);
59         __m128i xmAlpha = _mm_loadu_si128(cast(__m128i*)_xmAlpha);
60 
61         // main loop
62 
63         while (bpos < endbit)
64         {
65             int nsb = nextSetBit(mask, bpos, endbit);
66 
67             // do we have a span of unchanging coverage?
68 
69             if (bpos < nsb)
70             {
71                 // Calc coverage of first pixel
72 
73                 static if (wr == WindingRule.NonZero)
74                 {
75                     int cover = xmWinding[3]+delta[bpos*4];
76                     cover = abs(cover)*2;
77                     if (cover > 0xFFFF) cover = 0xFFFF;
78                 }
79                 else
80                 {
81                     int cover = xmWinding[3]+delta[bpos*4];
82                     short tsc = cast(short) cover;
83                     cover = (tsc ^ (tsc >> 15)) << 1;
84                 }
85 
86                 // We can skip the span
87 
88                 if (cover < 0x100)
89                 {
90                     bpos = nsb;
91                 }
92 
93                 // Or fill span with solid color
94 
95                 else if (isopaque && (cover > 0xFF00))
96                 {
97                     __m128i tqc = _mm_set1_epi32(color);
98 
99                     uint* ptr = &dest[bpos*4];
100                     uint* end = &dest[nsb*4];
101 
102                     while (ptr < end)
103                     {
104                         _mm_storeu_si128(cast(__m128i*)ptr, tqc);
105                         ptr+=4;                        
106                     }
107 
108                     bpos = nsb;
109                 }
110 
111                 // Or fill the span with transparent color
112 
113                 else
114                 {
115                     __m128i tpma = _mm_set1_epi16(cast(ushort) cover); 
116                     tpma = _mm_mulhi_epu16(xmAlpha,tpma);
117                     __m128i tpmc = _mm_mulhi_epu16(xmColor,tpma);
118                     tpmc = _mm_packus_epi16(tpmc,tpmc);
119                     tpma  = _mm_not_si128(tpma); // 1-alpha
120 
121                     uint* ptr = &dest[bpos*4];
122                     uint* end = &dest[nsb*4];
123 
124                     while (ptr < end)
125                     {
126                         __m128i d0 = _mm_loadu_si128(cast(__m128i*)ptr);
127                         __m128i d1 = _mm_unpackhi_epi8(d0,XMZERO);
128                         d0 = _mm_unpacklo_epi8(d0,XMZERO);
129                         d0 = _mm_mulhi_epu16(d0,tpma);
130                         d1 = _mm_mulhi_epu16(d1,tpma);
131                         d0 = _mm_packus_epi16(d0,d1);
132                         d0 =  _mm_adds_epu8(d0,tpmc);
133                         _mm_storeu_si128(cast(__m128i*)ptr,d0);
134                         ptr+=4;
135                     }
136 
137                     bpos = nsb;
138                 }
139             }
140 
141             // At this point we need to integrate scandelta
142 
143             uint* ptr = &dest[bpos*4];
144             uint* end = &dest[endbit*4];
145             int* dlptr = &delta[bpos*4];
146 
147             while (ptr < end)
148             {
149                 // Integrate delta values
150 
151                 __m128i tqw = _mm_loadu_si128(cast(__m128i*)dlptr);
152                 tqw = _mm_add_epi32(tqw, _mm_slli_si128!4(tqw)); 
153                 tqw = _mm_add_epi32(tqw, _mm_slli_si128!8(tqw)); 
154                 tqw = _mm_add_epi32(tqw, xmWinding); 
155                 xmWinding = _mm_shuffle_epi32!255(tqw);  
156                 _mm_storeu_si128(cast(__m128i*)dlptr,XMZERO);
157 
158                 // Process coverage values taking account of winding rule
159                 
160                 static if (wr == WindingRule.NonZero)
161                 {
162                     __m128i tcvr = _mm_srai_epi32(tqw,31); 
163                     tqw = _mm_add_epi32(tcvr,tqw);
164                     tqw = _mm_xor_si128(tqw,tcvr);        // abs
165                     tcvr = _mm_packs_epi32(tqw,XMZERO);   // saturate/pack to int16
166                     tcvr = _mm_slli_epi16(tcvr, 1);       // << to uint16
167                 }
168                 else
169                 {
170                     __m128i tcvr = _mm_and_si128(tqw,XMMSK16); 
171                     tqw = _mm_srai_epi16(tcvr,15);       // mask
172                     tcvr = _mm_xor_si128(tcvr,tqw);      // fold in halff
173                     tcvr = _mm_packs_epi32(tcvr,XMZERO); // pack to int16
174                     tcvr = _mm_slli_epi16(tcvr, 1);      // << to uint16
175                 } 
176 
177                 // Load destination pixels
178                 __m128i d01 = _mm_loadu_si128(cast(__m128i*) ptr);
179                 __m128i d0 = _mm_unpacklo_epi8 (d01, XMZERO);
180                 __m128i d1 = _mm_unpackhi_epi8 (d01, XMZERO);
181 
182                 // muliply source alpha & coverage
183 
184                 __m128i a0 = _mm_mulhi_epu16(tcvr,xmAlpha);
185                 a0 = _mm_unpacklo_epi16(a0,a0); 
186                 __m128i a1 = _mm_unpackhi_epi32(a0,a0);
187                 a0 = _mm_unpacklo_epi32(a0,a0);
188 
189                 // r = alpha*color + dest - alpha*dest
190 
191                 __m128i r0 = _mm_mulhi_epu16(xmColor,a0);
192                 __m128i tmp = _mm_mulhi_epu16(d0,a0);
193                 r0 = _mm_add_epi16(r0, d0);
194                 r0 = _mm_sub_epi16(r0, tmp);
195 
196                 __m128i r1 = _mm_mulhi_epu16(xmColor,a1);
197                 tmp   = _mm_mulhi_epu16(d1,a1);
198                 r1 = _mm_add_epi16(r1, d1);
199                 r1 = _mm_sub_epi16(r1, tmp);
200 
201                 __m128i r01 = _mm_packus_epi16(r0,r1);
202 
203                 _mm_storeu_si128(cast(__m128i*)ptr,r01);
204                 
205                 bpos++;
206                 ptr+=4;
207                 dlptr+=4;
208 
209                 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break;
210             }
211         }
212     }
213 
214     uint color;
215     ubyte[16] _xmColor;
216     ubyte[16] _xmAlpha;
217 }
218 
219  void doBlit_ColorBlit_NonZero(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y)
220  {
221      ColorBlit* cb = cast(ColorBlit*)userData;
222      return cb.color_blit!(WindingRule.NonZero)(dest, delta, mask, x0, x1, y);
223  }
224 
225  void doBlit_ColorBlit_EvenOdd(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y)
226  {
227     ColorBlit* cb = cast(ColorBlit*)userData;
228     return cb.color_blit!(WindingRule.EvenOdd)(dest, delta, mask, x0, x1, y);
229  }