1 /**
2  * Analytic antialiasing rasterizer.
3  * Copyright: Copyright Chris Jones 2020.
4  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
5  */
6 module dplug.canvas.rasterizer;
7 
8 import std.traits;
9 import dplug.core.math;
10 import dplug.core.vec;
11 import dplug.canvas.misc;
12 
13 // Those asserts are disabled by default, since they are very slow in debug mode.
14 //debug = checkRasterizer;
15 
16 /*
17   Analytic antialiasing rasterizer.
18   =================================
19 
20   Internally works with 24:8 fixed point integer coordinates.
21 
22   You need 8 bits fractional to get 256 levels of gray for almost
23   horizontal or almost vertical lines. Hence 24:8 fixed point.
24 
25   It's a scanline based rasterizer. You add path data in the form
26   of lines and curves etc... Those are converted to edges. The edges
27   are converted to scanline coverage, then coverage is combined with
28   paint and blended to the destination pixels.
29 
30   The coverage is stored in differentiated form, each cell in
31   the scanline stores the difference between cells rather than
32   the actual coverage. This means we dont have to track coverage
33   for long spans where nothing happens.
34 
35   It also uses a bitmask to track changes in coverage. It's an idea
36   taken from Blend2D although i think my implementation is different.
37   Basically anywhere an edge crosses the current scanline the bit
38   for the leftmost pixel it touches is set in the mask. So we can
39   use a bitscan instruction to find long spans of unchanging
40   coverage and where we need to start processing the coverage again.
41 
42   The mask uses 1 bit for every 4 pixels, because the blitters are
43   processing 4 pixels at once with SIMD.
44 
45   Cliping is handled by having left and right clip buffers, any edges
46   crossing the left or righ boundry are spit at the boundry. Parts inside
47   are handled normaly, parts outside are added to the clip buffers so
48   that we keep track of what coverage they contribute. This is then
49   added to the scandelta at the start of processing each line. These
50   buffers use differentiated coverage.
51 */
52 
53 /*
54    Winding rule
55    Gradient repeat mode
56    Angular gradient mode
57 
58    (repeat modes not implemented yet)
59 */
60 
61 nothrow:
62 @nogc:
63 
64 enum WindingRule
65 {
66     NonZero,
67     EvenOdd
68 }
69 
70 enum RepeatMode
71 {
72     Pad,
73     Repeat,
74     Mirror
75 }
76 
77 enum AngularMode
78 {
79     Single,
80     Double,
81     Quad
82 }
83 
84 /*
85   Delta mask stuff
86   what word type to use for mask
87   how many pixels per bit
88   how many pixels per word
89   bit mask for width of DMWord
90 */
91 
92 static if ((void*).sizeof == 4)
93     alias DMWord = uint;
94 else static if ((void*).sizeof == 8)
95     alias DMWord = ulong;
96 else
97     static assert(0);
98 
99 private:
100 
101 enum dmPixPerBit = 4; 
102 enum dmPixPerWord = dmPixPerBit * 8 * DMWord.sizeof;
103 enum dmWordMask = 8 * DMWord.sizeof - 1;
104 
105 /*
106   set a bit in the delta mask, 'x' is pixel cordinate, not bit index
107 */
108 
109 void DMSetBit(DMWord* mask, uint x)
110 {
111     mask[x/dmPixPerWord] |= (cast(DMWord)1) << ((x / dmPixPerBit) & dmWordMask);  
112 }
113 
114 void DMSetBitRange(DMWord* mask, uint x0, int x1)
115 {
116     while (x0 <= x1)
117     {
118         mask[x0/dmPixPerWord] |= (cast(DMWord)1) << ((x0 / dmPixPerBit) & dmWordMask);
119         x0+=4;
120     }
121 }
122 
123 /*
124   Few constants for fixed point coordinates / gradients
125 */
126 
127 enum fpFracBits = 8;    // 8 bits fractional
128 enum fpScale = 256.0f;  // for converting from float
129 enum fpDXScale = 4294967296.0; // convert to dx gradient in 32:32
130 enum fpDYScale = 1073741824.0; // as above but div 4
131 
132 /*
133   Blitter delegate. A callback that does the actual blitting once coverage
134   for the given scanline has been calculated.
135     delta - pointer to the delta buffer
136     mask  - pointer to delta mask
137     x0    - start x
138     x1    - end x
139     y     - y position
140 */
141 
142 alias BlitFunction = void function(void* userData, int* delta, DMWord* mask, int x0, int x1, int y);
143 
144 
145 
146 /*
147   a*b/c, with the intermediate result of a*b in 64 bit
148   the asm version might be faster in 32 bit mode, havent tested yet, but the
149   plain D version is same speed with 64bit / LDC
150 */
151 
152 public:
153 
154 struct Blitter
155 {
156     void* userData; // used for retrieving context
157     BlitFunction doBlit;
158 }
159 
160 /// Rasterizer
161 package struct Rasterizer
162 {
163 public:
164 nothrow:
165 @nogc:
166 
167     @disable this(this);
168 
169 
170     /*
171       initialise -- This sets the clip rectange, flushes any existing state
172       and preps for drawing.
173 
174       The clip window left,top is inside, right,bottom is outside. So if the
175       window is 100,100 --> 200,200, then pixel 100,100 can be modified but
176       pixel 200,200 will not.
177 
178       The rasterizer however needs to allow coordinates that fall directly on
179       the right side and bottom side of the clip even though those pixels are
180       techically outside. It's easier and faster to give the temporary buffers
181       a bit extra room for overspill than it is to check and special case
182       when it happens.
183 
184       Also the delta buffer and two clip buffers use differentiated coverage
185       which also causes one extra pixel overspill. If you differentiate a
186       sequence of length N you get a sequence of length N+1. Again it's easier
187       and faster to just allow for the overspill than it is to check for and
188       special case it.
189     */
190 
191     void initialise(int left, int top, int right, int bottom)
192     {
193         assert((left >= 0) && (left < right));
194         assert((top >= 0) && (top < bottom));
195 
196         m_clipleft = left << fpFracBits;
197         m_cliptop = top << fpFracBits;
198         m_clipright = right << fpFracBits;
199         m_clipbottom = bottom << fpFracBits;
200 
201         // reset edge buffer and Y extent tracking
202 
203         m_edgepool.reset();
204         m_yrmin = bottom;
205         m_yrmax = top;
206 
207         // init buffers
208         m_scandelta.resize(roundUpPow2((right+3)|63));
209 
210         m_deltamaskSize = cast(size_t) roundUpPow2(1+right/dmPixPerWord);
211         if (m_deltamaskSize > m_deltamaskAlloc)
212         {
213             m_deltamask = cast(DMWord*) alignedReallocDiscard(m_deltamask, m_deltamaskSize * (DMWord*).sizeof, 1);
214             m_deltamaskAlloc = m_deltamaskSize;
215         }
216 
217         m_buckets.resize(roundUpPow2(bottom+1));
218         m_clipbfr_l.resize(roundUpPow2((bottom+2)|63));
219         m_clipbfr_r.resize(roundUpPow2((bottom+2)|63));
220 
221         m_scandelta.fill(0);
222         // m_deltamask is init on each rasterized line
223         m_buckets.fill(null);
224         m_clipbfr_l.fill(0);
225         m_clipbfr_r.fill(0);
226 
227         // init prev x,y and sub path start x,y
228 
229         m_prevx = 0;
230         m_prevy = 0;
231         m_subpx = 0;
232         m_subpy = 0;
233         m_fprevx = 0;
234         m_fprevy = 0;
235     }
236 
237     ~this()
238     {
239         if (m_deltamask)
240         {
241             alignedFree(m_deltamask, 1);
242             m_deltamask = null;
243         }
244     }
245 
246     // rasterize
247 
248     void rasterize(Blitter blitter)
249     {
250         Edge dummy;
251         Edge* prev = &dummy;
252         Edge* edge = null;
253 
254         int startx = (m_clipleft >> fpFracBits) & 0xFFFFFFFC;
255         int endx = ((m_clipright >> fpFracBits) + 3) & 0xFFFFFFFC;
256         int starty = m_yrmin >> fpFracBits;
257         int endy = (m_yrmax+255) >> fpFracBits;
258 
259         int cl_acc,cr_acc;
260         int cl_pos = m_clipleft >> fpFracBits;
261         int cr_pos = m_clipright >> fpFracBits;
262 
263         for (int y = starty; y < endy; y++)
264         {
265             m_deltamask[0..m_deltamaskSize] = 0;
266             int ly = (y << fpFracBits) + 256;
267 
268             // clip accumulator
269 
270             cl_acc += m_clipbfr_l[y];
271             m_clipbfr_l[y] = 0;
272             cr_acc += m_clipbfr_r[y];
273             m_clipbfr_r[y] = 0;
274 
275             if (cl_acc) DMSetBit(m_deltamask, cl_pos);
276             if (cr_acc) DMSetBit(m_deltamask, cr_pos);
277 
278             m_scandelta[cl_pos] += cl_acc;
279             m_scandelta[cr_pos] += cr_acc;
280 
281             // At this point 'prev' either points at 'dummy' or at the last node in
282             //   active edges linked list, so we just add the new edges to it.
283 
284             prev.next = m_buckets[y];
285             m_buckets[y] = null;
286 
287             // loop through the active edges
288 
289             prev = &dummy;
290             edge = dummy.next;
291 
292             while (edge)
293             {
294                 int ny = void;
295 
296                 if (edge.y2 <= ly)
297                 {
298                     ny = edge.y2;
299                     prev.next = edge.next;
300                 }
301                 else
302                 {
303                     ny = ly;
304                     prev = edge;
305                 }
306 
307                 int span = ny - edge.y;
308                 long nx = edge.x + edge.dx * span;
309 
310                 int bpspan = span * ((cast(int)(edge.dy>>63))|1);
311 
312                 int x0 = cast(int)(edge.x >> 40);
313                 int x1 = cast(int)(nx >> 40);
314                 int steps = x1 - x0;
315 
316                 if (steps == 0)
317                 {
318                     DMSetBit(m_deltamask, x0);
319 
320                     int w = (edge.x >> 32) & 0xFF;
321                     int v = (nx >> 32) & 0xFF;
322                     int area = (bpspan * (512 - w - v)) >> 2;
323                     m_scandelta[x0] += area;
324                     x0++;
325                     m_scandelta[x0] += bpspan * 128 - area;
326                 }
327                 else if (steps > 0)
328                 {
329                     DMSetBitRange(m_deltamask, x0, x1);
330 
331                     int w = 256 - ((edge.x >> 32) & 0xFF);
332                     long acc = w * edge.dy;
333                     int area = cast(int)((w * acc) >> 32);
334                     m_scandelta[x0] += area;
335                     x0++;
336                     acc += edge.dy << 7;
337 
338                     while (x0 < x1)
339                     {
340                         int lc = area;
341                         area = cast(int)(acc >> 23);
342                         m_scandelta[x0] += area - lc;
343                         x0++;
344                         acc += edge.dy << 8;
345                     }
346 
347                     int q = (nx >> 32) & 0xFF;
348                     int rect = bpspan * 128;
349                     int lc = area;
350                     area = rect - cast(int)((q * q * edge.dy) >> 32);
351                     m_scandelta[x0] += area - lc;
352                     x0++;
353                     m_scandelta[x0] += rect - area;
354                 }
355                 else if (steps < 0)
356                 {
357                     DMSetBitRange(m_deltamask, x1, x0);
358 
359                     int w = 256 - ((nx >> 32) & 0xFF);
360                     long acc = w * edge.dy;
361                     int area = cast(int)((w * acc) >> 32);
362                     m_scandelta[x1] += area;
363                     x1++;
364                     acc += edge.dy << 7;
365 
366                     while (x1 < x0)
367                     {
368                         int lc = area;
369                         area = cast(int)(acc >> 23);
370                         m_scandelta[x1] += area - lc;
371                         x1++;
372                         acc += edge.dy << 8;
373                     }
374 
375                     int q = (edge.x >> 32) & 0xFF;
376                     int rect = bpspan * 128;
377                     int lc = area;
378                     area = rect - cast(int)((q * q * edge.dy) >> 32);
379                     m_scandelta[x1] += area - lc;
380                     x1++;
381                     m_scandelta[x1] += rect - area;
382                 }
383 
384                 edge.x = nx;
385                 edge.y = ny;
386                 edge = edge.next;
387             }
388 
389             // Blit scanline
390 
391             blitter.doBlit(blitter.userData, m_scandelta.ptr, m_deltamask, startx, endx, y);
392 
393             // clear scandelta overspill
394 
395             m_scandelta[endx] = 0;
396 
397             debug(checkRasterizer)
398             {
399 
400                 size_t size = m_scandelta.length;
401                 foreach(e; m_scandelta) assert(e == 0);
402             }
403         }
404 
405         // clear clip buffers overspill
406 
407         m_clipbfr_l[endy] = 0;
408         m_clipbfr_r[endy] = 0;
409 
410         debug(checkRasterizer)
411         {
412            foreach(e; m_clipbfr_l) assert(e == 0);
413            foreach(e; m_clipbfr_r) assert(e == 0);
414         }
415 
416         // clear m_buckets overspill, this is only needed because in very
417         // rare cases we could end up with an edge could end up on the
418         // bottom clip boundry after spliting an edge, these should really
419         // be removed in the clipping code
420 
421         m_buckets[endy] = null;
422         
423         debug(checkRasterizer)
424         {
425            foreach(e; m_buckets) assert(e == null);
426         } 
427 
428         m_edgepool.reset();
429     }
430 
431     /*
432       drawing methods
433     */
434 
435     void moveTo(double x, double y)
436     {
437         intMoveTo(cast(int)(x * fpScale), cast(int)(y * fpScale));
438         m_fprevx = x;
439         m_fprevy = y;
440     }
441 
442     void moveTo(float x, float y)
443     {
444         intMoveTo(cast(int)(x * fpScale), cast(int)(y * fpScale));
445         m_fprevx = x;
446         m_fprevy = y;
447     }
448 
449     void lineTo(double x, double y)
450     {
451         intLineTo(cast(int)(x * fpScale), cast(int)(y * fpScale));
452         m_fprevx = x;
453         m_fprevy = y;
454     }
455 
456     void lineTo(float x, float y)
457     {
458         intLineTo(cast(int)(x * fpScale), cast(int)(y * fpScale));
459         m_fprevx = x;
460         m_fprevy = y;
461     }
462 
463     void quadTo(float x1, float y1, float x2, float y2)
464     {
465         float x01 = (m_fprevx+x1)*0.5;
466         float y01 = (m_fprevy+y1)*0.5;
467         float x12 = (x1+x2)*0.5;
468         float y12 = (y1+y2)*0.5;
469         float xctr = (x01+x12)*0.5;
470         float yctr = (y01+y12)*0.5;
471         float err = (x1-xctr)*(x1-xctr)+(y1-yctr)*(y1-yctr);
472 
473         if (err > 0.1)
474         {
475             quadTo(x01,y01,xctr,yctr);
476             quadTo(x12,y12,x2,y2);
477         }
478         else
479         {
480             intLineTo(cast(int)(x2 * fpScale), cast(int)(y2 * fpScale));
481         }
482 
483         m_fprevx = x2;
484         m_fprevy = y2;
485     }
486 
487     // TODO: when all points are the same => stack overflow
488     void cubicTo(float x1, float y1, float x2, float y2, float x3, float y3)
489     {
490         float x01 = (m_fprevx+x1)*0.5;
491         float y01 = (m_fprevy+y1)*0.5;
492         float x12 = (x1+x2)*0.5;
493         float y12 = (y1+y2)*0.5;
494         float x23 = (x2+x3)*0.5;
495         float y23 = (y2+y3)*0.5;
496         
497         float xc0 = (x01+x12)*0.5;
498         float yc0 = (y01+y12)*0.5;
499         float xc1 = (x12+x23)*0.5;
500         float yc1 = (y12+y23)*0.5;
501         float xctr = (xc0+xc1)*0.5;
502         float yctr = (yc0+yc1)*0.5;
503         
504         // this flattenening test code was from a page on the antigrain geometry
505         // website.
506 
507         float dx = x3-m_fprevx;
508         float dy = y3-m_fprevy;
509 
510         double d2 = fast_fabs(((x1 - x3) * dy - (y1 - y3) * dx));
511         double d3 = fast_fabs(((x2 - x3) * dy - (y2 - y3) * dx));
512 
513         if((d2 + d3)*(d2 + d3) < 0.5 * (dx*dx + dy*dy))
514         {
515             intLineTo(cast(int)(x3 * fpScale), cast(int)(y3 * fpScale));
516         }
517         else
518         {
519             cubicTo(x01,y01,xc0,yc0,xctr,yctr);
520             cubicTo(xc1,yc1,x23,y23,x3,y3);
521         }
522 
523         m_fprevx = x3;
524         m_fprevy = y3;
525     }
526 
527     void closePath()
528     {
529         if ((m_prevx != m_subpx) || (m_prevy != m_subpy))
530         {
531             intLineTo(m_subpx, m_subpy);
532         }
533     }
534 
535 private:
536 
537     // internal moveTo. Note this will close any existing subpath because
538     // unclosed paths cause bad things to happen. (visually at least)
539 
540     void intMoveTo(int x, int y)
541     {
542         closePath();
543         m_prevx = x;
544         m_prevy = y;
545         m_subpx = x;
546         m_subpy = y;
547     }
548 
549     // internal lineTo, clips and adds the line to edge buckets and clip
550     // buffers as appropriate
551 
552     void intLineTo(int x, int y)
553     {
554         // mixin for adding edges. For some reason LDC wouldnt inline this when
555         // it was a seperate function, and it was 15% slower that way
556 
557         string addEdgeM(string x0, string y0, string x1, string y1, string dir)
558         {
559             string tmp = (dir == "+") ? (y1~"-"~y0) : (y0~"-"~y1);
560             return
561                 "Edge* edge = m_edgepool.allocate();" ~
562                 "edge.dx = cast(long) (fpDXScale * ("~x1~"-"~x0~") / ("~y1~"-"~y0~"));" ~
563                 "edge.x = (cast(long) "~x0~") << 32;" ~
564                 "edge.y = "~y0~";" ~
565                 "edge.y2 = "~y1~";" ~
566                 "int by = "~y0~" >> fpFracBits;" ~
567                 "int xxx = max(abs("~x1~"-"~x0~"),1);" ~
568                 "edge.dy = cast(long) (fpDYScale * ("~tmp~") /  xxx);" ~
569                 "edge.next = m_buckets[by];" ~
570                 "m_buckets[by] = edge;";
571         }
572 
573         // mixin for clip accumulator
574 
575         string addToClip(string y0, string y1, string side, string dir)
576         {
577             return
578                 "{ int i0 = "~y0~" >> fpFracBits;" ~
579                 "int f0 = ("~y0~" & 0xFF) << 7;" ~
580                 "int i1 = "~y1~" >> fpFracBits;" ~
581                 "int f1 = ("~y1~" & 0xFF) << 7;" ~
582                 "m_clipbfr_"~side~"[i0] "~dir~"= 32768-f0;" ~
583                 "m_clipbfr_"~side~"[i0+1] "~dir~"= f0;" ~
584                 "m_clipbfr_"~side~"[i1] "~dir~"= f1-32768;" ~
585                 "m_clipbfr_"~side~"[i1+1] "~dir~"= -f1; }";
586         }
587 
588         // handle upward and downward lines seperately
589 
590         if (m_prevy < y)
591         {
592             int x0 = m_prevx, y0 = m_prevy, x1 = x, y1 = y;
593 
594             // edge is outside clip box or horizontal
595 
596             if ((y0 == y1) || (y0 >= m_clipbottom) || (y1 <= m_cliptop))
597             {
598                 goto finished;
599             }
600 
601             // clip to top and bottom
602 
603             if (y0 < m_cliptop)
604             {
605                 x0 = x0 + MulDiv64(m_cliptop - y0, x1 - x0,  y1 - y0);
606                 y0 = m_cliptop;
607             }
608 
609             if (y1 > m_clipbottom)
610             {
611                 x1 = x0 + MulDiv64(m_clipbottom - y0, x1 - x0, y1 - y0);
612                 y1 = m_clipbottom;
613             }
614 
615             // track y extent
616 
617             if (y0 < m_yrmin) m_yrmin = y0;
618             if (y1 > m_yrmax) m_yrmax = y1;
619 
620             // generate horizontal zoning flags, these are set depending on where
621             // x0 and x1 are in respect of the clip box.
622 
623             uint a = cast(uint)(x0<m_clipleft);
624             uint b = cast(uint)(x0>m_clipright);
625             uint c = cast(uint)(x1<m_clipleft);
626             uint d = cast(uint)(x1>m_clipright);
627             uint flags = a | (b*2) | (c*4) | (d*8);
628 
629             if (flags == 0) // bit faster to pull no clip out front
630             {             
631                 mixin(addEdgeM("x0","y0","x1","y1","+"));
632                 goto finished;
633             }
634 
635             // note cliping here can occasionaly result in horizontals, and can
636             // ocaisionaly put a horizontal on bucket for clipbotttom, which is
637             // outside the drawable area, currently it allows it and zeros that
638             // bucket after rasterization. 
639 
640             switch (flags)
641             {
642             case (1): // 0001 --> x0 left, x1 center
643                 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
644                 mixin(addToClip("y0","sy","l","+"));
645                 mixin(addEdgeM("m_clipleft","sy","x1","y1","+"));
646                 break;
647             case (2): // 0010 --> x0 right, x1 center
648                 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
649                 mixin(addToClip("y0","sy","r","+"));
650                 mixin(addEdgeM("m_clipright","sy","x1","y1","+"));
651                 break;
652             case (4): // 0100 --> x0 center, x1 left
653                 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
654                 mixin(addEdgeM("x0","y0","m_clipleft","sy","+"));
655                 mixin(addToClip("sy","y1","l","+"));
656                 break;
657             case (5): // 0101 --> x0 left, x1 left
658                 mixin(addToClip("y0","y1","l","+"));
659                 break;
660             case (6): // 0110 --> x0 right, x1 left
661                 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
662                 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
663                 mixin(addToClip("y0","sr","r","+"));
664                 mixin(addEdgeM("m_clipright","sr","m_clipleft","sl","+"));
665                 mixin(addToClip("sl","y1","l","+"));
666                 break;
667             case (8): // 1000 --> x0 center, x1 right
668                 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
669                 mixin(addEdgeM("x0","y0","m_clipright","sy","+"));
670                 mixin(addToClip("sy","y1","r","+"));
671                 break;
672             case (9): // 1001 --> x0 left, x1 right
673                 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
674                 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
675                 mixin(addToClip("y0","sl","l","+"));
676                 mixin(addEdgeM("m_clipleft","sl","m_clipright","sr","+"));
677                 mixin(addToClip("sr","y1","r","+"));
678                 break;
679             case (10): // 1001 --> x0 right, x1 right
680                 mixin(addToClip("y0","y1","r","+"));
681                 break;
682             default: // everything else is NOP
683                 break; 
684             }
685         }
686         else
687         {
688             int x1 = m_prevx, y1 = m_prevy, x0 = x, y0 = y;
689 
690             // edge is outside clip box or horizontal
691 
692             if ((y0 == y1) || (y0 >= m_clipbottom) || (y1 <= m_cliptop))
693             {
694                 goto finished;
695             }
696 
697             // clip to top and bottom
698 
699             if (y0 < m_cliptop)
700             {
701                 x0 = x0 + MulDiv64(m_cliptop - y0, x1 - x0,  y1 - y0);
702                 y0 = m_cliptop;
703             }
704 
705             if (y1 > m_clipbottom)
706             {
707                 x1 = x0 + MulDiv64(m_clipbottom - y0, x1 - x0, y1 - y0);
708                 y1 = m_clipbottom;
709             }
710 
711             // track y extent
712 
713             if (y0 < m_yrmin) m_yrmin = y0;
714             if (y1 > m_yrmax) m_yrmax = y1;
715 
716             // generate horizontal zoning flags, these are set depending on where
717             // x0 and x1 are in respect of the clip box.
718 
719             uint a = cast(uint)(x0<m_clipleft);
720             uint b = cast(uint)(x0>m_clipright);
721             uint c = cast(uint)(x1<m_clipleft);
722             uint d = cast(uint)(x1>m_clipright);
723             uint flags = a | (b*2) | (c*4) | (d*8);
724 
725             if (flags == 0) // bit faster to pull no clip out front
726             {             
727                 mixin(addEdgeM("x0","y0","x1","y1","-"));
728                 goto finished;
729             }
730          
731             // note cliping here can occasionaly result in horizontals, and can
732             // occasionally put a horizontal on bucket for clipbotttom, which is
733             // outside the drawable area, currently it allows it and zeros that
734             // bucket after rasterization. 
735 
736             switch (flags)
737             {
738             case (1): // 0001 --> x0 left, x1 center
739                 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
740                 mixin(addToClip("y0","sy","l","-"));
741                 mixin(addEdgeM("m_clipleft","sy","x1","y1","-"));
742                 break;
743             case (2): // 0010 --> x0 right, x1 center
744                 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
745                 mixin(addToClip("y0","sy","r","-"));
746                 mixin(addEdgeM("m_clipright","sy","x1","y1","-"));
747                 break;
748             case (4): // 0100 --> x0 center, x1 left
749                 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
750                 mixin(addEdgeM("x0","y0","m_clipleft","sy","-"));
751                 mixin(addToClip("sy","y1","l","-"));
752                 break;
753             case (5): // 0101 --> x0 left, x1 left
754                 mixin(addToClip("y0","y1","l","-"));
755                 break;
756             case (6): // 0110 --> x0 right, x1 left
757                 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
758                 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
759                 mixin(addToClip("y0","sr","r","-"));
760                 mixin(addEdgeM("m_clipright","sr","m_clipleft","sl","-"));
761                 mixin(addToClip("sl","y1","l","-"));
762                 break;
763             case (8): // 1000 --> x0 center, x1 right
764                 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
765                 mixin(addEdgeM("x0","y0","m_clipright","sy","-"));
766                 mixin(addToClip("sy","y1","r","-"));
767                 break;
768             case (9): // 1001 --> x0 left, x1 right
769                 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0);
770                 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0);
771                 mixin(addToClip("y0","sl","l","-"));
772                 mixin(addEdgeM("m_clipleft","sl","m_clipright","sr","-"));
773                 mixin(addToClip("sr","y1","r","-"));
774                 break;
775             case (10): // 1001 --> x0 right, x1 right
776                 mixin(addToClip("y0","y1","r","-"));
777                 break;
778             default: // everything else is NOP
779                 break; 
780             }
781         }
782     
783     finished:
784 
785         m_prevx = x;
786         m_prevy = y;
787     }
788 
789     // edge struct
790 
791     struct Edge
792     {
793         long x, dx, dy;
794         int y, y2;
795         Edge* next;
796     }
797 
798     ArenaAllocator!(Edge,100) m_edgepool;
799 
800     // PERF: no reasons to be Vec here
801     Vec!(Edge*) m_buckets;
802     Vec!int m_scandelta;
803 
804     size_t m_deltamaskSize = 0;
805     size_t m_deltamaskAlloc = 0;
806     DMWord* m_deltamask;
807 
808     Vec!int m_clipbfr_l;
809     Vec!int m_clipbfr_r;
810 
811     // clip rectangle, in 24:8 fixed point
812 
813     int m_clipleft;
814     int m_cliptop;
815     int m_clipright;
816     int m_clipbottom;
817 
818     // keeps track of y extent
819 
820     int m_yrmin,m_yrmax;
821 
822     // start of current subpath, 
823 
824     int m_subpx,m_subpy;
825 
826     // previous x,y (internal coords)
827 
828     int m_prevx,m_prevy;
829 
830     // previous x,y float coords
831 
832     float m_fprevx,m_fprevy;
833 
834 }
835 
836 // the rasterizer itself should be a reusable, small object suitable for the stack
837 static assert(Rasterizer.sizeof < 256);
838 
839 private:
840 
841 int MulDiv64(int a, int b, int c)
842 {
843     return cast(int) ((cast(long) a * b) / c);
844 }