1 /**
2 * Analytic antialiasing rasterizer.
3 * Copyright: Copyright Chris Jones 2020.
4 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
5 */
6 module dplug.canvas.rasterizer;
7
8 import std.traits;
9 import dplug.core.math;
10 import dplug.core.vec;
11 import dplug.canvas.misc;
12
13 // Those asserts are disabled by default, since they are very slow in debug mode.
14 //debug = checkRasterizer;
15
16 /*
17 Analytic antialiasing rasterizer.
18 =================================
19
20 Internally works with 24:8 fixed point integer coordinates.
21
22 You need 8 bits fractional to get 256 levels of gray for almost
23 horizontal or almost vertical lines. Hence 24:8 fixed point.
24
25 It's a scanline based rasterizer. You add path data in the form
26 of lines and curves etc... Those are converted to edges. The edges
27 are converted to scanline coverage, then coverage is combined with
28 paint and blended to the destination pixels.
29
30 The coverage is stored in differentiated form, each cell in
31 the scanline stores the difference between cells rather than
32 the actual coverage. This means we dont have to track coverage
33 for long spans where nothing happens.
34
35 It also uses a bitmask to track changes in coverage. It's an idea
36 taken from Blend2D although i think my implementation is different.
37 Basically anywhere an edge crosses the current scanline the bit
38 for the leftmost pixel it touches is set in the mask. So we can
39 use a bitscan instruction to find long spans of unchanging
40 coverage and where we need to start processing the coverage again.
41
42 The mask uses 1 bit for every 4 pixels, because the blitters are
43 processing 4 pixels at once with SIMD.
44
45 Cliping is handled by having left and right clip buffers, any edges
46 crossing the left or righ boundry are spit at the boundry. Parts inside
47 are handled normaly, parts outside are added to the clip buffers so
48 that we keep track of what coverage they contribute. This is then
49 added to the scandelta at the start of processing each line. These
50 buffers use differentiated coverage.
51 */
52
53 /*
54 Winding rule
55 Gradient repeat mode
56 Angular gradient mode
57
58 (repeat modes not implemented yet)
59 */
60
61 nothrow:
62 @nogc:
63
64 enum WindingRule
65 {
66 NonZero,
67 EvenOdd
68 }
69
70 enum RepeatMode
71 {
72 Pad,
73 Repeat,
74 Mirror
75 }
76
77 enum AngularMode
78 {
79 Single,
80 Double,
81 Quad
82 }
83
84 enum CompositeOp
85 {
86 SourceOver,
87 Add,
88 Subtract,
89 LightenOnly,
90 DarkenOnly
91 }
92
93 /*
94 Delta mask stuff
95 what word type to use for mask
96 how many pixels per bit
97 how many pixels per word
98 bit mask for width of DMWord
99 */
100
101 static if ((void*).sizeof == 4)
102 alias DMWord = uint;
103 else static if ((void*).sizeof == 8)
104 alias DMWord = ulong;
105 else
106 static assert(0);
107
108 private:
109
110 enum dmPixPerBit = 4;
111 enum dmPixPerWord = dmPixPerBit * 8 * DMWord.sizeof;
112 enum dmWordMask = 8 * DMWord.sizeof - 1;
113
114 /*
115 set a bit in the delta mask, 'x' is pixel cordinate, not bit index
116 */
117
118 void DMSetBit(DMWord* mask, uint x)
119 {
120 mask[x/dmPixPerWord] |= (cast(DMWord)1) << ((x / dmPixPerBit) & dmWordMask);
121 }
122
123 void DMSetBitRange(DMWord* mask, uint x0, int x1)
124 {
125 while (x0 <= x1)
126 {
127 mask[x0/dmPixPerWord] |= (cast(DMWord)1) << ((x0 / dmPixPerBit) & dmWordMask);
128 x0+=4;
129 }
130 }
131
132 /*
133 Few constants for fixed point coordinates / gradients
134 */
135
136 enum fpFracBits = 8; // 8 bits fractional
137 enum fpScale = 256.0f; // for converting from float
138 enum fpDXScale = 4294967296.0; // convert to dx gradient in 32:32
139 enum fpDYScale = 1073741824.0; // as above but div 4
140
141 /*
142 Blitter delegate. A callback that does the actual blitting once coverage
143 for the given scanline has been calculated.
144 dest - destination pixels
145 delta - pointer to the delta buffer
146 mask - pointer to delta mask
147 x0 - start x
148 x1 - end x
149 y - current y (needed for gradients)
150 */
151
152 alias BlitFunction = void function(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y);
153
154
155
156 /*
157 a*b/c, with the intermediate result of a*b in 64 bit
158 the asm version might be faster in 32 bit mode, havent tested yet, but the
159 plain D version is same speed with 64bit / LDC
160 */
161
162 public:
163
164 struct Blitter
165 {
166 void* userData; // used for retrieving context
167 BlitFunction doBlit;
168 }
169
170 /// Rasterizer
171 package struct Rasterizer
172 {
173 public:
174 nothrow:
175 @nogc:
176
177 @disable this(this);
178
179
180 /*
181 initialise -- This sets the clip rectange, flushes any existing state
182 and preps for drawing.
183
184 The clip window left,top is inside, right,bottom is outside. So if the
185 window is 100,100 --> 200,200, then pixel 100,100 can be modified but
186 pixel 200,200 will not.
187
188 The rasterizer however needs to allow coordinates that fall directly on
189 the right side and bottom side of the clip even though those pixels are
190 techically outside. It's easier and faster to give the temporary buffers
191 a bit extra room for overspill than it is to check and special case
192 when it happens.
193
194 Also the delta buffer and two clip buffers use differentiated coverage
195 which also causes one extra pixel overspill. If you differentiate a
196 sequence of length N you get a sequence of length N+1. Again it's easier
197 and faster to just allow for the overspill than it is to check for and
198 special case it.
199 */
200
201 void initialise(int left, int top, int right, int bottom)
202 {
203 assert((left >= 0) && (left < right));
204 assert((top >= 0) && (top < bottom));
205
206 m_clipleft = left << fpFracBits;
207 m_cliptop = top << fpFracBits;
208 m_clipright = right << fpFracBits;
209 m_clipbottom = bottom << fpFracBits;
210
211 // reset edge buffer and Y extent tracking
212
213 m_edgepool.reset();
214 m_yrmin = bottom;
215 m_yrmax = top;
216
217 // init buffers
218 m_scandelta.resize(roundUpPow2((right+3)|63));
219
220 m_deltamaskSize = cast(size_t) roundUpPow2(1+right/dmPixPerWord);
221 if (m_deltamaskSize > m_deltamaskAlloc)
222 {
223 m_deltamask = cast(DMWord*) alignedReallocDiscard(m_deltamask, m_deltamaskSize * (DMWord*).sizeof, 1);
224 m_deltamaskAlloc = m_deltamaskSize;
225 }
226
227 m_buckets.resize(roundUpPow2(bottom+1));
228 m_clipbfr_l.resize(roundUpPow2((bottom+2)|63));
229 m_clipbfr_r.resize(roundUpPow2((bottom+2)|63));
230
231 m_destBuf.resize( (right+3)*4 ); // 3 additional pixels, size of a scanline
232
233 m_scandelta.fill(0);
234 // m_deltamask is init on each rasterized line
235 m_buckets.fill(null);
236 m_clipbfr_l.fill(0);
237 m_clipbfr_r.fill(0);
238
239 // init prev x,y and sub path start x,y
240
241 m_prevx = 0;
242 m_prevy = 0;
243 m_subpx = 0;
244 m_subpy = 0;
245 m_fprevx = 0;
246 m_fprevy = 0;
247 }
248
249 ~this()
250 {
251 if (m_deltamask)
252 {
253 alignedFree(m_deltamask, 1);
254 m_deltamask = null;
255 }
256 }
257
258 // rasterize
259
260 void rasterize(ubyte* imagePixels,
261 const size_t imagePitchInBytes,
262 const int imageHeight,
263 Blitter blitter,
264 CompositeOp compositeOp)
265 {
266 Edge dummy;
267 Edge* prev = &dummy;
268 Edge* edge = null;
269
270 int startx = (m_clipleft >> fpFracBits) & 0xFFFFFFFC;
271 int endx = ((m_clipright >> fpFracBits) + 3) & 0xFFFFFFFC;
272 int starty = m_yrmin >> fpFracBits;
273 int endy = (m_yrmax+255) >> fpFracBits;
274
275 int cl_acc,cr_acc;
276 int cl_pos = m_clipleft >> fpFracBits;
277 int cr_pos = m_clipright >> fpFracBits;
278
279 ubyte* pDest = imagePixels + imagePitchInBytes * starty;
280
281 for (int y = starty; y < endy; y++)
282 {
283 m_deltamask[0..m_deltamaskSize] = 0;
284 int ly = (y << fpFracBits) + 256;
285
286 // clip accumulator
287
288 cl_acc += m_clipbfr_l[y];
289 m_clipbfr_l[y] = 0;
290 cr_acc += m_clipbfr_r[y];
291 m_clipbfr_r[y] = 0;
292
293 if (cl_acc) DMSetBit(m_deltamask, cl_pos);
294 if (cr_acc) DMSetBit(m_deltamask, cr_pos);
295
296 m_scandelta[cl_pos] += cl_acc;
297 m_scandelta[cr_pos] += cr_acc;
298
299 // At this point 'prev' either points at 'dummy' or at the last node in
300 // active edges linked list, so we just add the new edges to it.
301
302 prev.next = m_buckets[y];
303 m_buckets[y] = null;
304
305 // loop through the active edges
306
307 prev = &dummy;
308 edge = dummy.next;
309
310 while (edge)
311 {
312 int ny = void;
313
314 if (edge.y2 <= ly)
315 {
316 ny = edge.y2;
317 prev.next = edge.next;
318 }
319 else
320 {
321 ny = ly;
322 prev = edge;
323 }
324
325 int span = ny - edge.y;
326 long nx = edge.x + edge.dx * span;
327
328 int bpspan = span * ((cast(int)(edge.dy>>63))|1);
329
330 int x0 = cast(int)(edge.x >> 40);
331 int x1 = cast(int)(nx >> 40);
332 int steps = x1 - x0;
333
334 if (steps == 0)
335 {
336 DMSetBit(m_deltamask, x0);
337
338 int w = (edge.x >> 32) & 0xFF;
339 int v = (nx >> 32) & 0xFF;
340 int area = (bpspan * (512 - w - v)) >> 2;
341 m_scandelta[x0] += area;
342 x0++;
343 m_scandelta[x0] += bpspan * 128 - area;
344 }
345 else if (steps > 0)
346 {
347 DMSetBitRange(m_deltamask, x0, x1);
348
349 int w = 256 - ((edge.x >> 32) & 0xFF);
350 long acc = w * edge.dy;
351 int area = cast(int)((w * acc) >> 32);
352 m_scandelta[x0] += area;
353 x0++;
354 acc += edge.dy << 7;
355
356 while (x0 < x1)
357 {
358 int lc = area;
359 area = cast(int)(acc >> 23);
360 m_scandelta[x0] += area - lc;
361 x0++;
362 acc += edge.dy << 8;
363 }
364
365 int q = (nx >> 32) & 0xFF;
366 int rect = bpspan * 128;
367 int lc = area;
368 area = rect - cast(int)((q * q * edge.dy) >> 32);
369 m_scandelta[x0] += area - lc;
370 x0++;
371 m_scandelta[x0] += rect - area;
372 }
373 else if (steps < 0)
374 {
375 DMSetBitRange(m_deltamask, x1, x0);
376
377 int w = 256 - ((nx >> 32) & 0xFF);
378 long acc = w * edge.dy;
379 int area = cast(int)((w * acc) >> 32);
380 m_scandelta[x1] += area;
381 x1++;
382 acc += edge.dy << 7;
383
384 while (x1 < x0)
385 {
386 int lc = area;
387 area = cast(int)(acc >> 23);
388 m_scandelta[x1] += area - lc;
389 x1++;
390 acc += edge.dy << 8;
391 }
392
393 int q = (edge.x >> 32) & 0xFF;
394 int rect = bpspan * 128;
395 int lc = area;
396 area = rect - cast(int)((q * q * edge.dy) >> 32);
397 m_scandelta[x1] += area - lc;
398 x1++;
399 m_scandelta[x1] += rect - area;
400 }
401
402 edge.x = nx;
403 edge.y = ny;
404 edge = edge.next;
405 }
406
407 // Blit scanline
408 if (compositeOp == CompositeOp.SourceOver)
409 {
410 // special case for fastest blit, source-over is the default.
411 blitter.doBlit(blitter.userData, cast(uint*)pDest, m_scandelta.ptr, m_deltamask, startx, endx, y);
412 }
413 else
414 {
415 uint* tmpBuf = cast(uint*)m_destBuf.ptr;
416 uint* dest = cast(uint*)pDest;
417
418 assert((startx & 3) == 0);
419 assert((endx & 3) == 0);
420
421 // Fill with zeroes
422 tmpBuf[startx..endx] = 0; // black in rgba8
423
424 // Write into destbuf.
425 // There is an additional difficulty, because the pixels that were touched have an excess in -3 to +3 potentially.
426 // This is done with coverage being zero. So our Porter-Duff would overwrite if we don't multiply with source-alpha!
427 blitter.doBlit(blitter.userData, tmpBuf, m_scandelta.ptr, m_deltamask, startx, endx, y);
428
429 // Custom composite operations go here.
430
431 // Note: in HTML standard, destination alpha is considered, and the resulting color
432 // is a weighted average of the dest and blended source. I suppose alpha also gets to be
433 // a weighted average, but this implies a divide, and we assume background alpha to be 255.
434 //
435 // alpha-res = (255 * 255 + src-alpha * src-alpha) / (src-alpha + 255)
436 // Under these circumstances, alpha-res is always close to 255.
437
438 __m128i alphaMask = _mm_set1_epi32(0xff000000);
439
440 final switch (compositeOp)
441 {
442 case CompositeOp.SourceOver:
443 assert(false);
444
445 case CompositeOp.Add:
446 for (int x = startx; x < endx; x += 4)
447 {
448 __m128i D = _mm_loadu_si128(cast(__m128i*) &dest[x]);
449 __m128i S = _mm_loadu_si128(cast(__m128i*) &tmpBuf[x]);
450 __m128i Z = _mm_setzero_si128();
451 __m128i D0_3 = _mm_unpacklo_epi8(D, Z);
452 __m128i D4_7 = _mm_unpackhi_epi8(D, Z);
453 __m128i S0_3 = _mm_unpacklo_epi8(S, Z);
454 __m128i S4_7 = _mm_unpackhi_epi8(S, Z);
455 D0_3 = _mm_add_epi16(D0_3, S0_3);
456 D4_7 = _mm_add_epi16(D4_7, S4_7);
457 __m128i R = _mm_packus_epi16(D0_3, D4_7);
458 R = _mm_or_si128(R, alphaMask);
459 _mm_storeu_si128(cast(__m128i*) &dest[x], R);
460 }
461 break;
462
463 case CompositeOp.Subtract:
464 for (int x = startx; x < endx; x += 4)
465 {
466 __m128i D = _mm_loadu_si128(cast(__m128i*) &dest[x]);
467 __m128i S = _mm_loadu_si128(cast(__m128i*) &tmpBuf[x]);
468 __m128i Z = _mm_setzero_si128();
469 __m128i D0_3 = _mm_unpacklo_epi8(D, Z);
470 __m128i D4_7 = _mm_unpackhi_epi8(D, Z);
471 __m128i S0_3 = _mm_unpacklo_epi8(S, Z);
472 __m128i S4_7 = _mm_unpackhi_epi8(S, Z);
473 D0_3 = _mm_sub_epi16(D0_3, S0_3);
474 D4_7 = _mm_sub_epi16(D4_7, S4_7);
475 __m128i R = _mm_packus_epi16(D0_3, D4_7);
476 R = _mm_or_si128(R, alphaMask);
477 _mm_storeu_si128(cast(__m128i*) &dest[x], R);
478 }
479 break;
480
481 case CompositeOp.LightenOnly:
482 // (1 - alpha) * bg + max(bg, fg) * alpha
483 // == (1 - alpha) * bg + max(bg * alpha, fg * alpha) and we get fg.alpha
484 for (int x = startx; x < endx; x += 4)
485 {
486 __m128i D = _mm_loadu_si128(cast(__m128i*) &dest[x]); // BG
487 __m128i S = _mm_loadu_si128(cast(__m128i*) &tmpBuf[x]); // FG * A, A
488 __m128i Z = _mm_setzero_si128();
489
490 __m128i D0_1 = _mm_unpacklo_epi8(D, Z); // two background pixels
491 __m128i D2_3 = _mm_unpackhi_epi8(D, Z); // ditto
492 __m128i S0_1 = _mm_unpacklo_epi8(S, Z); // two foreground pixels
493 __m128i S2_3 = _mm_unpackhi_epi8(S, Z); // ditto
494
495 __m128i A0_1 = _mm_shufflelo_epi16!0xff(_mm_shufflehi_epi16!0xff(S0_1)); // A0 A0 A0 A0 A1 A1 A1 A1
496 __m128i A2_3 = _mm_shufflelo_epi16!0xff(_mm_shufflehi_epi16!0xff(S2_3)); // A2 A2 A2 A2 A3 A3 A3 A3
497 __m128i m255 = _mm_set1_epi16(255);
498 __m128i mA0_1 = _mm_sub_epi16(m255, A0_1); // (255-A0)x4 (255-A1)x4
499 __m128i mA2_3 = _mm_sub_epi16(m255, A2_3);
500 A0_1 = _mm_slli_epi16(A0_1, 8);
501 A2_3 = _mm_slli_epi16(A2_3, 8);
502 mA0_1 = _mm_slli_epi16(mA0_1, 8);
503 mA2_3 = _mm_slli_epi16(mA2_3, 8);
504
505 // multiply bg * alpha
506 __m128i BGA0_1 = _mm_mulhi_epu16(A0_1, D0_1);
507 __m128i BGA2_3 = _mm_mulhi_epu16(A2_3, D2_3);
508
509 // multiply bg * (1 - alpha)
510 __m128i mBGA0_1 = _mm_mulhi_epu16(mA0_1, D0_1);
511 __m128i mBGA2_3 = _mm_mulhi_epu16(mA2_3, D2_3);
512
513 __m128i R0_1 = _mm_add_epi16( _mm_max_epi16(S0_1, BGA0_1), mBGA0_1);
514 __m128i R2_3 = _mm_add_epi16( _mm_max_epi16(S2_3, BGA2_3), mBGA2_3);
515
516 __m128i R = _mm_packus_epi16(R0_1, R2_3);
517 R = _mm_or_si128(R, alphaMask);
518 _mm_storeu_si128(cast(__m128i*) &dest[x], R);
519 }
520 break;
521
522 case CompositeOp.DarkenOnly:
523 // (1 - alpha) * bg + min(bg, fg) * alpha
524 // == (1 - alpha) * bg + min(bg * alpha, fg * alpha) and we get fg.alpha
525 for (int x = startx; x < endx; x += 4)
526 {
527 __m128i D = _mm_loadu_si128(cast(__m128i*) &dest[x]); // BG
528 __m128i S = _mm_loadu_si128(cast(__m128i*) &tmpBuf[x]); // FG * A, A
529 __m128i Z = _mm_setzero_si128();
530
531 __m128i D0_1 = _mm_unpacklo_epi8(D, Z); // two background pixels
532 __m128i D2_3 = _mm_unpackhi_epi8(D, Z); // ditto
533 __m128i S0_1 = _mm_unpacklo_epi8(S, Z); // two foreground pixels
534 __m128i S2_3 = _mm_unpackhi_epi8(S, Z); // ditto
535
536 __m128i A0_1 = _mm_shufflelo_epi16!0xff(_mm_shufflehi_epi16!0xff(S0_1)); // A0 A0 A0 A0 A1 A1 A1 A1
537 __m128i A2_3 = _mm_shufflelo_epi16!0xff(_mm_shufflehi_epi16!0xff(S2_3)); // A2 A2 A2 A2 A3 A3 A3 A3
538 __m128i m255 = _mm_set1_epi16(255);
539 __m128i mA0_1 = _mm_sub_epi16(m255, A0_1); // (255-A0)x4 (255-A1)x4
540 __m128i mA2_3 = _mm_sub_epi16(m255, A2_3);
541 A0_1 = _mm_slli_epi16(A0_1, 8);
542 A2_3 = _mm_slli_epi16(A2_3, 8);
543 mA0_1 = _mm_slli_epi16(mA0_1, 8);
544 mA2_3 = _mm_slli_epi16(mA2_3, 8);
545
546 // multiply bg * alpha
547 __m128i BGA0_1 = _mm_mulhi_epu16(A0_1, D0_1);
548 __m128i BGA2_3 = _mm_mulhi_epu16(A2_3, D2_3);
549
550 // multiply bg * (1 - alpha)
551 __m128i mBGA0_1 = _mm_mulhi_epu16(mA0_1, D0_1);
552 __m128i mBGA2_3 = _mm_mulhi_epu16(mA2_3, D2_3);
553
554 __m128i R0_1 = _mm_add_epi16( _mm_min_epi16(S0_1, BGA0_1), mBGA0_1);
555 __m128i R2_3 = _mm_add_epi16( _mm_min_epi16(S2_3, BGA2_3), mBGA2_3);
556
557 __m128i R = _mm_packus_epi16(R0_1, R2_3);
558 R = _mm_or_si128(R, alphaMask);
559 _mm_storeu_si128(cast(__m128i*) &dest[x], R);
560 }
561 break;
562 }
563 }
564
565 pDest += imagePitchInBytes;
566
567 // clear scandelta overspill
568
569 m_scandelta[endx] = 0;
570
571 debug(checkRasterizer)
572 {
573
574 size_t size = m_scandelta.length;
575 foreach(e; m_scandelta) assert(e == 0);
576 }
577 }
578
579 // clear clip buffers overspill
580
581 m_clipbfr_l[endy] = 0;
582 m_clipbfr_r[endy] = 0;
583
584 debug(checkRasterizer)
585 {
586 foreach(e; m_clipbfr_l) assert(e == 0);
587 foreach(e; m_clipbfr_r) assert(e == 0);
588 }
589
590 // clear m_buckets overspill, this is only needed because in very
591 // rare cases we could end up with an edge could end up on the
592 // bottom clip boundry after spliting an edge, these should really
593 // be removed in the clipping code
594
595 m_buckets[endy] = null;
596
597 debug(checkRasterizer)
598 {
599 foreach(e; m_buckets) assert(e == null);
600 }
601
602 m_edgepool.reset();
603 }
604
605 /*
606 drawing methods
607 */
608
609 void moveTo(double x, double y)
610 {
611 intMoveTo(cast(int)(x * fpScale), cast(int)(y * fpScale));
612 m_fprevx = x;
613 m_fprevy = y;
614 }
615
616 void moveTo(float x, float y)
617 {
618 intMoveTo(cast(int)(x * fpScale), cast(int)(y * fpScale));
619 m_fprevx = x;
620 m_fprevy = y;
621 }
622
623 void lineTo(double x, double y)
624 {
625 intLineTo(cast(int)(x * fpScale), cast(int)(y * fpScale));
626 m_fprevx = x;
627 m_fprevy = y;
628 }
629
630 void lineTo(float x, float y)
631 {
632 intLineTo(cast(int)(x * fpScale), cast(int)(y * fpScale));
633 m_fprevx = x;
634 m_fprevy = y;
635 }
636
637 void quadTo(float x1, float y1, float x2, float y2)
638 {
639 float x01 = (m_fprevx+x1)*0.5;
640 float y01 = (m_fprevy+y1)*0.5;
641 float x12 = (x1+x2)*0.5;
642 float y12 = (y1+y2)*0.5;
643 float xctr = (x01+x12)*0.5;
644 float yctr = (y01+y12)*0.5;
645 float err = (x1-xctr)*(x1-xctr)+(y1-yctr)*(y1-yctr);
646
647 if (err > 0.1)
648 {
649 quadTo(x01,y01,xctr,yctr);
650 quadTo(x12,y12,x2,y2);
651 }
652 else
653 {
654 intLineTo(cast(int)(x2 * fpScale), cast(int)(y2 * fpScale));
655 }
656
657 m_fprevx = x2;
658 m_fprevy = y2;
659 }
660
661 void cubicTo(float x1, float y1, float x2, float y2, float x3, float y3)
662 {
663 bool error = (x1 == x2 && x2 == x3 && y1 == y2 && y2 == y3);
664
665 // Avoid stack overflow with infinite recursion.
666 // It is poorly understood why that happens.
667 if (error)
668 return;
669
670 float x01 = (m_fprevx+x1)*0.5;
671 float y01 = (m_fprevy+y1)*0.5;
672 float x12 = (x1+x2)*0.5;
673 float y12 = (y1+y2)*0.5;
674 float x23 = (x2+x3)*0.5;
675 float y23 = (y2+y3)*0.5;
676
677 float xc0 = (x01+x12)*0.5;
678 float yc0 = (y01+y12)*0.5;
679 float xc1 = (x12+x23)*0.5;
680 float yc1 = (y12+y23)*0.5;
681 float xctr = (xc0+xc1)*0.5;
682 float yctr = (yc0+yc1)*0.5;
683
684 // this flattenening test code was from a page on the antigrain geometry
685 // website.
686
687 float dx = x3-m_fprevx;
688 float dy = y3-m_fprevy;
689
690 double d2 = fast_fabs(((x1 - x3) * dy - (y1 - y3) * dx));
691 double d3 = fast_fabs(((x2 - x3) * dy - (y2 - y3) * dx));
692
693 if((d2 + d3)*(d2 + d3) < 0.5 * (dx*dx + dy*dy))
694 {
695 intLineTo(cast(int)(x3 * fpScale), cast(int)(y3 * fpScale));
696 }
697 else
698 {
699 cubicTo(x01,y01,xc0,yc0,xctr,yctr);
700 cubicTo(xc1,yc1,x23,y23,x3,y3);
701 }
702
703 m_fprevx = x3;
704 m_fprevy = y3;
705 }
706
707 void closePath()
708 {
709 if ((m_prevx != m_subpx) || (m_prevy != m_subpy))
710 {
711 intLineTo(m_subpx, m_subpy);
712 }
713 }
714
715 private:
716
717 // internal moveTo. Note this will close any existing subpath because
718 // unclosed paths cause bad things to happen. (visually at least)
719
720 void intMoveTo(int x, int y)
721 {
722 closePath();
723 m_prevx = x;
724 m_prevy = y;
725 m_subpx = x;
726 m_subpy = y;
727 }
728
729 // internal lineTo, clips and adds the line to edge buckets and clip
730 // buffers as appropriate
731
732 void intLineTo(int x, int y)
733 {
734 // mixin for adding edges. For some reason LDC wouldnt inline this when
735 // it was a separate function, and it was 15% slower that way
736
737 string addEdgeM(string x0, string y0, string x1, string y1, string dir)
738 {
739 string tmp = (dir == "+") ? (y1~"-"~y0) : (y0~"-"~y1);
740 return
741 "Edge* edge = m_edgepool.allocate();" ~
742 "edge.dx = cast(long) (fpDXScale * ("~x1~"-"~x0~") / ("~y1~"-"~y0~"));" ~
743 "edge.x = (cast(long) "~x0~") << 32;" ~
744 "edge.y = "~y0~";" ~
745 "edge.y2 = "~y1~";" ~
746 "int by = "~y0~" >> fpFracBits;" ~
747 "int xxx = max(abs("~x1~"-"~x0~"),1);" ~
748 "edge.dy = cast(long) (fpDYScale * ("~tmp~") / xxx);" ~
749 "edge.next = m_buckets[by];" ~
750 "m_buckets[by] = edge;";
751 }
752
753 // mixin for clip accumulator
754
755 string addToClip(string y0, string y1, string side, string dir)
756 {
757 return
758 "{ int i0 = "~y0~" >> fpFracBits;" ~
759 "int f0 = ("~y0~" & 0xFF) << 7;" ~
760 "int i1 = "~y1~" >> fpFracBits;" ~
761 "int f1 = ("~y1~" & 0xFF) << 7;" ~
762 "m_clipbfr_"~side~"[i0] "~dir~"= 32768-f0;" ~
763 "m_clipbfr_"~side~"[i0+1] "~dir~"= f0;" ~
764 "m_clipbfr_"~side~"[i1] "~dir~"= f1-32768;" ~
765 "m_clipbfr_"~side~"[i1+1] "~dir~"= -f1; }";
766 }
767
768 // handle upward and downward lines seperately
769
770 if (m_prevy < y)
771 {
772 int x0 = m_prevx, y0 = m_prevy, x1 = x, y1 = y;
773
774 // edge is outside clip box or horizontal
775
776 if ((y0 == y1) || (y0 >= m_clipbottom) || (y1 <= m_cliptop))
777 {
778 goto finished;
779 }
780
781 // clip to top and bottom
782
783 if (y0 < m_cliptop)
784 {
785 x0 = x0 + MulDiv64(m_cliptop - y0, x1 - x0, y1 - y0);
786 y0 = m_cliptop;
787 }
788
789 if (y1 > m_clipbottom)
790 {
791 x1 = x0 + MulDiv64(m_clipbottom - y0, x1 - x0, y1 - y0);
792 y1 = m_clipbottom;
793 }
794
795 // track y extent
796
797 if (y0 < m_yrmin) m_yrmin = y0;
798 if (y1 > m_yrmax) m_yrmax = y1;
799
800 // generate horizontal zoning flags, these are set depending on where
801 // x0 and x1 are in respect of the clip box.
802
803 uint a = cast(uint)(x0<m_clipleft);
804 uint b = cast(uint)(x0>m_clipright);
805 uint c = cast(uint)(x1<m_clipleft);
806 uint d = cast(uint)(x1>m_clipright);
807 uint flags = a | (b*2) | (c*4) | (d*8);
808
809 if (flags == 0) // bit faster to pull no clip out front
810 {
811 mixin(addEdgeM("x0","y0","x1","y1","+"));
812 goto finished;
813 }
814
815 // note cliping here can occasionaly result in horizontals, and can
816 // ocaisionaly put a horizontal on bucket for clipbotttom, which is
817 // outside the drawable area, currently it allows it and zeros that
818 // bucket after rasterization.
819
820 switch (flags)
821 {
822 case (1): // 0001 --> x0 left, x1 center
823 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
824 mixin(addToClip("y0","sy","l","+"));
825 mixin(addEdgeM("m_clipleft","sy","x1","y1","+"));
826 break;
827 case (2): // 0010 --> x0 right, x1 center
828 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
829 mixin(addToClip("y0","sy","r","+"));
830 mixin(addEdgeM("m_clipright","sy","x1","y1","+"));
831 break;
832 case (4): // 0100 --> x0 center, x1 left
833 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
834 mixin(addEdgeM("x0","y0","m_clipleft","sy","+"));
835 mixin(addToClip("sy","y1","l","+"));
836 break;
837 case (5): // 0101 --> x0 left, x1 left
838 mixin(addToClip("y0","y1","l","+"));
839 break;
840 case (6): // 0110 --> x0 right, x1 left
841 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
842 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
843 mixin(addToClip("y0","sr","r","+"));
844 mixin(addEdgeM("m_clipright","sr","m_clipleft","sl","+"));
845 mixin(addToClip("sl","y1","l","+"));
846 break;
847 case (8): // 1000 --> x0 center, x1 right
848 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
849 mixin(addEdgeM("x0","y0","m_clipright","sy","+"));
850 mixin(addToClip("sy","y1","r","+"));
851 break;
852 case (9): // 1001 --> x0 left, x1 right
853 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
854 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
855 mixin(addToClip("y0","sl","l","+"));
856 mixin(addEdgeM("m_clipleft","sl","m_clipright","sr","+"));
857 mixin(addToClip("sr","y1","r","+"));
858 break;
859 case (10): // 1001 --> x0 right, x1 right
860 mixin(addToClip("y0","y1","r","+"));
861 break;
862 default: // everything else is NOP
863 break;
864 }
865 }
866 else
867 {
868 int x1 = m_prevx, y1 = m_prevy, x0 = x, y0 = y;
869
870 // edge is outside clip box or horizontal
871
872 if ((y0 == y1) || (y0 >= m_clipbottom) || (y1 <= m_cliptop))
873 {
874 goto finished;
875 }
876
877 // clip to top and bottom
878
879 if (y0 < m_cliptop)
880 {
881 x0 = x0 + MulDiv64(m_cliptop - y0, x1 - x0, y1 - y0);
882 y0 = m_cliptop;
883 }
884
885 if (y1 > m_clipbottom)
886 {
887 x1 = x0 + MulDiv64(m_clipbottom - y0, x1 - x0, y1 - y0);
888 y1 = m_clipbottom;
889 }
890
891 // track y extent
892
893 if (y0 < m_yrmin) m_yrmin = y0;
894 if (y1 > m_yrmax) m_yrmax = y1;
895
896 // generate horizontal zoning flags, these are set depending on where
897 // x0 and x1 are in respect of the clip box.
898
899 uint a = cast(uint)(x0<m_clipleft);
900 uint b = cast(uint)(x0>m_clipright);
901 uint c = cast(uint)(x1<m_clipleft);
902 uint d = cast(uint)(x1>m_clipright);
903 uint flags = a | (b*2) | (c*4) | (d*8);
904
905 if (flags == 0) // bit faster to pull no clip out front
906 {
907 mixin(addEdgeM("x0","y0","x1","y1","-"));
908 goto finished;
909 }
910
911 // note cliping here can occasionaly result in horizontals, and can
912 // occasionally put a horizontal on bucket for clipbotttom, which is
913 // outside the drawable area, currently it allows it and zeros that
914 // bucket after rasterization.
915
916 switch (flags)
917 {
918 case (1): // 0001 --> x0 left, x1 center
919 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
920 mixin(addToClip("y0","sy","l","-"));
921 mixin(addEdgeM("m_clipleft","sy","x1","y1","-"));
922 break;
923 case (2): // 0010 --> x0 right, x1 center
924 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
925 mixin(addToClip("y0","sy","r","-"));
926 mixin(addEdgeM("m_clipright","sy","x1","y1","-"));
927 break;
928 case (4): // 0100 --> x0 center, x1 left
929 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
930 mixin(addEdgeM("x0","y0","m_clipleft","sy","-"));
931 mixin(addToClip("sy","y1","l","-"));
932 break;
933 case (5): // 0101 --> x0 left, x1 left
934 mixin(addToClip("y0","y1","l","-"));
935 break;
936 case (6): // 0110 --> x0 right, x1 left
937 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
938 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
939 mixin(addToClip("y0","sr","r","-"));
940 mixin(addEdgeM("m_clipright","sr","m_clipleft","sl","-"));
941 mixin(addToClip("sl","y1","l","-"));
942 break;
943 case (8): // 1000 --> x0 center, x1 right
944 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
945 mixin(addEdgeM("x0","y0","m_clipright","sy","-"));
946 mixin(addToClip("sy","y1","r","-"));
947 break;
948 case (9): // 1001 --> x0 left, x1 right
949 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
950 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
951 mixin(addToClip("y0","sl","l","-"));
952 mixin(addEdgeM("m_clipleft","sl","m_clipright","sr","-"));
953 mixin(addToClip("sr","y1","r","-"));
954 break;
955 case (10): // 1001 --> x0 right, x1 right
956 mixin(addToClip("y0","y1","r","-"));
957 break;
958 default: // everything else is NOP
959 break;
960 }
961 }
962
963 finished:
964
965 m_prevx = x;
966 m_prevy = y;
967 }
968
969 // edge struct
970
971 struct Edge
972 {
973 long x, dx, dy;
974 int y, y2;
975 Edge* next;
976 }
977
978 ArenaAllocator!(Edge,100) m_edgepool;
979
980 // Note: the reason why it's Vec is to avoid an initialization that would reallocate down.
981
982 Vec!(Edge*) m_buckets;
983 Vec!int m_scandelta;
984
985 size_t m_deltamaskSize = 0;
986 size_t m_deltamaskAlloc = 0;
987 DMWord* m_deltamask;
988
989 Vec!int m_clipbfr_l;
990 Vec!int m_clipbfr_r;
991
992 // clip rectangle, in 24:8 fixed point
993
994 int m_clipleft;
995 int m_cliptop;
996 int m_clipright;
997 int m_clipbottom;
998
999 // keeps track of y extent
1000
1001 int m_yrmin,m_yrmax;
1002
1003 // start of current subpath,
1004
1005 int m_subpx,m_subpy;
1006
1007 // previous x,y (internal coords)
1008
1009 int m_prevx,m_prevy;
1010
1011 // previous x,y float coords
1012
1013 float m_fprevx,m_fprevy;
1014
1015 // Temporary dest buffer for Porter Duff operations.
1016 Vec!ubyte m_destBuf;
1017 }
1018
1019 // the rasterizer itself should be a reusable, small object suitable for the stack
1020 static assert(Rasterizer.sizeof < 280);
1021
1022 private:
1023
1024 int MulDiv64(int a, int b, int c)
1025 {
1026 return cast(int) ((cast(long) a * b) / c);
1027 }