1 /** 2 * Analytic antialiasing rasterizer. 3 * Copyright: Copyright Chris Jones 2020. 4 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 5 */ 6 module dplug.canvas.rasterizer; 7 8 import std.traits; 9 import dplug.core.math; 10 import dplug.core.vec; 11 import dplug.canvas.misc; 12 13 // Those asserts are disabled by default, since they are very slow in debug mode. 14 //debug = checkRasterizer; 15 16 /* 17 Analytic antialiasing rasterizer. 18 ================================= 19 20 Internally works with 24:8 fixed point integer coordinates. 21 22 You need 8 bits fractional to get 256 levels of gray for almost 23 horizontal or almost vertical lines. Hence 24:8 fixed point. 24 25 It's a scanline based rasterizer. You add path data in the form 26 of lines and curves etc... Those are converted to edges. The edges 27 are converted to scanline coverage, then coverage is combined with 28 paint and blended to the destination pixels. 29 30 The coverage is stored in differentiated form, each cell in 31 the scanline stores the difference between cells rather than 32 the actual coverage. This means we dont have to track coverage 33 for long spans where nothing happens. 34 35 It also uses a bitmask to track changes in coverage. It's an idea 36 taken from Blend2D although i think my implementation is different. 37 Basically anywhere an edge crosses the current scanline the bit 38 for the leftmost pixel it touches is set in the mask. So we can 39 use a bitscan instruction to find long spans of unchanging 40 coverage and where we need to start processing the coverage again. 41 42 The mask uses 1 bit for every 4 pixels, because the blitters are 43 processing 4 pixels at once with SIMD. 44 45 Cliping is handled by having left and right clip buffers, any edges 46 crossing the left or righ boundry are spit at the boundry. Parts inside 47 are handled normaly, parts outside are added to the clip buffers so 48 that we keep track of what coverage they contribute. This is then 49 added to the scandelta at the start of processing each line. These 50 buffers use differentiated coverage. 51 */ 52 53 /* 54 Winding rule 55 Gradient repeat mode 56 Angular gradient mode 57 58 (repeat modes not implemented yet) 59 */ 60 61 nothrow: 62 @nogc: 63 64 enum WindingRule 65 { 66 NonZero, 67 EvenOdd 68 } 69 70 enum RepeatMode 71 { 72 Pad, 73 Repeat, 74 Mirror 75 } 76 77 enum AngularMode 78 { 79 Single, 80 Double, 81 Quad 82 } 83 84 enum CompositeOp 85 { 86 SourceOver, 87 Add, 88 Subtract, 89 LightenOnly, 90 DarkenOnly 91 } 92 93 /* 94 Delta mask stuff 95 what word type to use for mask 96 how many pixels per bit 97 how many pixels per word 98 bit mask for width of DMWord 99 */ 100 101 static if ((void*).sizeof == 4) 102 alias DMWord = uint; 103 else static if ((void*).sizeof == 8) 104 alias DMWord = ulong; 105 else 106 static assert(0); 107 108 private: 109 110 enum dmPixPerBit = 4; 111 enum dmPixPerWord = dmPixPerBit * 8 * DMWord.sizeof; 112 enum dmWordMask = 8 * DMWord.sizeof - 1; 113 114 /* 115 set a bit in the delta mask, 'x' is pixel cordinate, not bit index 116 */ 117 118 void DMSetBit(DMWord* mask, uint x) 119 { 120 mask[x/dmPixPerWord] |= (cast(DMWord)1) << ((x / dmPixPerBit) & dmWordMask); 121 } 122 123 void DMSetBitRange(DMWord* mask, uint x0, int x1) 124 { 125 while (x0 <= x1) 126 { 127 mask[x0/dmPixPerWord] |= (cast(DMWord)1) << ((x0 / dmPixPerBit) & dmWordMask); 128 x0+=4; 129 } 130 } 131 132 /* 133 Few constants for fixed point coordinates / gradients 134 */ 135 136 enum fpFracBits = 8; // 8 bits fractional 137 enum fpScale = 256.0f; // for converting from float 138 enum fpDXScale = 4294967296.0; // convert to dx gradient in 32:32 139 enum fpDYScale = 1073741824.0; // as above but div 4 140 141 /* 142 Blitter delegate. A callback that does the actual blitting once coverage 143 for the given scanline has been calculated. 144 dest - destination pixels 145 delta - pointer to the delta buffer 146 mask - pointer to delta mask 147 x0 - start x 148 x1 - end x 149 y - current y (needed for gradients) 150 */ 151 152 alias BlitFunction = void function(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y); 153 154 155 156 /* 157 a*b/c, with the intermediate result of a*b in 64 bit 158 the asm version might be faster in 32 bit mode, havent tested yet, but the 159 plain D version is same speed with 64bit / LDC 160 */ 161 162 public: 163 164 struct Blitter 165 { 166 void* userData; // used for retrieving context 167 BlitFunction doBlit; 168 } 169 170 /// Rasterizer 171 package struct Rasterizer 172 { 173 public: 174 nothrow: 175 @nogc: 176 177 @disable this(this); 178 179 180 /* 181 initialise -- This sets the clip rectange, flushes any existing state 182 and preps for drawing. 183 184 The clip window left,top is inside, right,bottom is outside. So if the 185 window is 100,100 --> 200,200, then pixel 100,100 can be modified but 186 pixel 200,200 will not. 187 188 The rasterizer however needs to allow coordinates that fall directly on 189 the right side and bottom side of the clip even though those pixels are 190 techically outside. It's easier and faster to give the temporary buffers 191 a bit extra room for overspill than it is to check and special case 192 when it happens. 193 194 Also the delta buffer and two clip buffers use differentiated coverage 195 which also causes one extra pixel overspill. If you differentiate a 196 sequence of length N you get a sequence of length N+1. Again it's easier 197 and faster to just allow for the overspill than it is to check for and 198 special case it. 199 */ 200 201 void initialise(int left, int top, int right, int bottom) 202 { 203 assert((left >= 0) && (left < right)); 204 assert((top >= 0) && (top < bottom)); 205 206 m_clipleft = left << fpFracBits; 207 m_cliptop = top << fpFracBits; 208 m_clipright = right << fpFracBits; 209 m_clipbottom = bottom << fpFracBits; 210 211 // reset edge buffer and Y extent tracking 212 213 m_edgepool.reset(); 214 m_yrmin = bottom; 215 m_yrmax = top; 216 217 // init buffers 218 m_scandelta.resize(roundUpPow2((right+3)|63)); 219 220 m_deltamaskSize = cast(size_t) roundUpPow2(1+right/dmPixPerWord); 221 if (m_deltamaskSize > m_deltamaskAlloc) 222 { 223 m_deltamask = cast(DMWord*) alignedReallocDiscard(m_deltamask, m_deltamaskSize * (DMWord*).sizeof, 1); 224 m_deltamaskAlloc = m_deltamaskSize; 225 } 226 227 m_buckets.resize(roundUpPow2(bottom+1)); 228 m_clipbfr_l.resize(roundUpPow2((bottom+2)|63)); 229 m_clipbfr_r.resize(roundUpPow2((bottom+2)|63)); 230 231 m_destBuf.resize( (right+3)*4 ); // 3 additional pixels, size of a scanline 232 233 m_scandelta.fill(0); 234 // m_deltamask is init on each rasterized line 235 m_buckets.fill(null); 236 m_clipbfr_l.fill(0); 237 m_clipbfr_r.fill(0); 238 239 // init prev x,y and sub path start x,y 240 241 m_prevx = 0; 242 m_prevy = 0; 243 m_subpx = 0; 244 m_subpy = 0; 245 m_fprevx = 0; 246 m_fprevy = 0; 247 } 248 249 ~this() 250 { 251 if (m_deltamask) 252 { 253 alignedFree(m_deltamask, 1); 254 m_deltamask = null; 255 } 256 } 257 258 // rasterize 259 260 void rasterize(ubyte* imagePixels, 261 const size_t imagePitchInBytes, 262 const int imageHeight, 263 Blitter blitter, 264 CompositeOp compositeOp) 265 { 266 Edge dummy; 267 Edge* prev = &dummy; 268 Edge* edge = null; 269 270 int startx = (m_clipleft >> fpFracBits) & 0xFFFFFFFC; 271 int endx = ((m_clipright >> fpFracBits) + 3) & 0xFFFFFFFC; 272 int starty = m_yrmin >> fpFracBits; 273 int endy = (m_yrmax+255) >> fpFracBits; 274 275 int cl_acc,cr_acc; 276 int cl_pos = m_clipleft >> fpFracBits; 277 int cr_pos = m_clipright >> fpFracBits; 278 279 ubyte* pDest = imagePixels + imagePitchInBytes * starty; 280 281 for (int y = starty; y < endy; y++) 282 { 283 m_deltamask[0..m_deltamaskSize] = 0; 284 int ly = (y << fpFracBits) + 256; 285 286 // clip accumulator 287 288 cl_acc += m_clipbfr_l[y]; 289 m_clipbfr_l[y] = 0; 290 cr_acc += m_clipbfr_r[y]; 291 m_clipbfr_r[y] = 0; 292 293 if (cl_acc) DMSetBit(m_deltamask, cl_pos); 294 if (cr_acc) DMSetBit(m_deltamask, cr_pos); 295 296 m_scandelta[cl_pos] += cl_acc; 297 m_scandelta[cr_pos] += cr_acc; 298 299 // At this point 'prev' either points at 'dummy' or at the last node in 300 // active edges linked list, so we just add the new edges to it. 301 302 prev.next = m_buckets[y]; 303 m_buckets[y] = null; 304 305 // loop through the active edges 306 307 prev = &dummy; 308 edge = dummy.next; 309 310 while (edge) 311 { 312 int ny = void; 313 314 if (edge.y2 <= ly) 315 { 316 ny = edge.y2; 317 prev.next = edge.next; 318 } 319 else 320 { 321 ny = ly; 322 prev = edge; 323 } 324 325 int span = ny - edge.y; 326 long nx = edge.x + edge.dx * span; 327 328 int bpspan = span * ((cast(int)(edge.dy>>63))|1); 329 330 int x0 = cast(int)(edge.x >> 40); 331 int x1 = cast(int)(nx >> 40); 332 int steps = x1 - x0; 333 334 if (steps == 0) 335 { 336 DMSetBit(m_deltamask, x0); 337 338 int w = (edge.x >> 32) & 0xFF; 339 int v = (nx >> 32) & 0xFF; 340 int area = (bpspan * (512 - w - v)) >> 2; 341 m_scandelta[x0] += area; 342 x0++; 343 m_scandelta[x0] += bpspan * 128 - area; 344 } 345 else if (steps > 0) 346 { 347 DMSetBitRange(m_deltamask, x0, x1); 348 349 int w = 256 - ((edge.x >> 32) & 0xFF); 350 long acc = w * edge.dy; 351 int area = cast(int)((w * acc) >> 32); 352 m_scandelta[x0] += area; 353 x0++; 354 acc += edge.dy << 7; 355 356 while (x0 < x1) 357 { 358 int lc = area; 359 area = cast(int)(acc >> 23); 360 m_scandelta[x0] += area - lc; 361 x0++; 362 acc += edge.dy << 8; 363 } 364 365 int q = (nx >> 32) & 0xFF; 366 int rect = bpspan * 128; 367 int lc = area; 368 area = rect - cast(int)((q * q * edge.dy) >> 32); 369 m_scandelta[x0] += area - lc; 370 x0++; 371 m_scandelta[x0] += rect - area; 372 } 373 else if (steps < 0) 374 { 375 DMSetBitRange(m_deltamask, x1, x0); 376 377 int w = 256 - ((nx >> 32) & 0xFF); 378 long acc = w * edge.dy; 379 int area = cast(int)((w * acc) >> 32); 380 m_scandelta[x1] += area; 381 x1++; 382 acc += edge.dy << 7; 383 384 while (x1 < x0) 385 { 386 int lc = area; 387 area = cast(int)(acc >> 23); 388 m_scandelta[x1] += area - lc; 389 x1++; 390 acc += edge.dy << 8; 391 } 392 393 int q = (edge.x >> 32) & 0xFF; 394 int rect = bpspan * 128; 395 int lc = area; 396 area = rect - cast(int)((q * q * edge.dy) >> 32); 397 m_scandelta[x1] += area - lc; 398 x1++; 399 m_scandelta[x1] += rect - area; 400 } 401 402 edge.x = nx; 403 edge.y = ny; 404 edge = edge.next; 405 } 406 407 // Blit scanline 408 if (compositeOp == CompositeOp.SourceOver) 409 { 410 // special case for fastest blit, source-over is the default. 411 blitter.doBlit(blitter.userData, cast(uint*)pDest, m_scandelta.ptr, m_deltamask, startx, endx, y); 412 } 413 else 414 { 415 uint* tmpBuf = cast(uint*)m_destBuf.ptr; 416 uint* dest = cast(uint*)pDest; 417 418 assert((startx & 3) == 0); 419 assert((endx & 3) == 0); 420 421 // Fill with zeroes 422 tmpBuf[startx..endx] = 0; // black in rgba8 423 424 // Write into destbuf. 425 // There is an additional difficulty, because the pixels that were touched have an excess in -3 to +3 potentially. 426 // This is done with coverage being zero. So our Porter-Duff would overwrite if we don't multiply with source-alpha! 427 blitter.doBlit(blitter.userData, tmpBuf, m_scandelta.ptr, m_deltamask, startx, endx, y); 428 429 // Custom composite operations go here. 430 431 // Note: in HTML standard, destination alpha is considered, and the resulting color 432 // is a weighted average of the dest and blended source. I suppose alpha also gets to be 433 // a weighted average, but this implies a divide, and we assume background alpha to be 255. 434 // 435 // alpha-res = (255 * 255 + src-alpha * src-alpha) / (src-alpha + 255) 436 // Under these circumstances, alpha-res is always close to 255. 437 438 __m128i alphaMask = _mm_set1_epi32(0xff000000); 439 440 final switch (compositeOp) 441 { 442 case CompositeOp.SourceOver: 443 assert(false); 444 445 case CompositeOp.Add: 446 for (int x = startx; x < endx; x += 4) 447 { 448 __m128i D = _mm_loadu_si128(cast(__m128i*) &dest[x]); 449 __m128i S = _mm_loadu_si128(cast(__m128i*) &tmpBuf[x]); 450 __m128i Z = _mm_setzero_si128(); 451 __m128i D0_3 = _mm_unpacklo_epi8(D, Z); 452 __m128i D4_7 = _mm_unpackhi_epi8(D, Z); 453 __m128i S0_3 = _mm_unpacklo_epi8(S, Z); 454 __m128i S4_7 = _mm_unpackhi_epi8(S, Z); 455 D0_3 = _mm_add_epi16(D0_3, S0_3); 456 D4_7 = _mm_add_epi16(D4_7, S4_7); 457 __m128i R = _mm_packus_epi16(D0_3, D4_7); 458 R = _mm_or_si128(R, alphaMask); 459 _mm_storeu_si128(cast(__m128i*) &dest[x], R); 460 } 461 break; 462 463 case CompositeOp.Subtract: 464 for (int x = startx; x < endx; x += 4) 465 { 466 __m128i D = _mm_loadu_si128(cast(__m128i*) &dest[x]); 467 __m128i S = _mm_loadu_si128(cast(__m128i*) &tmpBuf[x]); 468 __m128i Z = _mm_setzero_si128(); 469 __m128i D0_3 = _mm_unpacklo_epi8(D, Z); 470 __m128i D4_7 = _mm_unpackhi_epi8(D, Z); 471 __m128i S0_3 = _mm_unpacklo_epi8(S, Z); 472 __m128i S4_7 = _mm_unpackhi_epi8(S, Z); 473 D0_3 = _mm_sub_epi16(D0_3, S0_3); 474 D4_7 = _mm_sub_epi16(D4_7, S4_7); 475 __m128i R = _mm_packus_epi16(D0_3, D4_7); 476 R = _mm_or_si128(R, alphaMask); 477 _mm_storeu_si128(cast(__m128i*) &dest[x], R); 478 } 479 break; 480 481 case CompositeOp.LightenOnly: 482 // (1 - alpha) * bg + max(bg, fg) * alpha 483 // == (1 - alpha) * bg + max(bg * alpha, fg * alpha) and we get fg.alpha 484 for (int x = startx; x < endx; x += 4) 485 { 486 __m128i D = _mm_loadu_si128(cast(__m128i*) &dest[x]); // BG 487 __m128i S = _mm_loadu_si128(cast(__m128i*) &tmpBuf[x]); // FG * A, A 488 __m128i Z = _mm_setzero_si128(); 489 490 __m128i D0_1 = _mm_unpacklo_epi8(D, Z); // two background pixels 491 __m128i D2_3 = _mm_unpackhi_epi8(D, Z); // ditto 492 __m128i S0_1 = _mm_unpacklo_epi8(S, Z); // two foreground pixels 493 __m128i S2_3 = _mm_unpackhi_epi8(S, Z); // ditto 494 495 __m128i A0_1 = _mm_shufflelo_epi16!0xff(_mm_shufflehi_epi16!0xff(S0_1)); // A0 A0 A0 A0 A1 A1 A1 A1 496 __m128i A2_3 = _mm_shufflelo_epi16!0xff(_mm_shufflehi_epi16!0xff(S2_3)); // A2 A2 A2 A2 A3 A3 A3 A3 497 __m128i m255 = _mm_set1_epi16(255); 498 __m128i mA0_1 = _mm_sub_epi16(m255, A0_1); // (255-A0)x4 (255-A1)x4 499 __m128i mA2_3 = _mm_sub_epi16(m255, A2_3); 500 A0_1 = _mm_slli_epi16(A0_1, 8); 501 A2_3 = _mm_slli_epi16(A2_3, 8); 502 mA0_1 = _mm_slli_epi16(mA0_1, 8); 503 mA2_3 = _mm_slli_epi16(mA2_3, 8); 504 505 // multiply bg * alpha 506 __m128i BGA0_1 = _mm_mulhi_epu16(A0_1, D0_1); 507 __m128i BGA2_3 = _mm_mulhi_epu16(A2_3, D2_3); 508 509 // multiply bg * (1 - alpha) 510 __m128i mBGA0_1 = _mm_mulhi_epu16(mA0_1, D0_1); 511 __m128i mBGA2_3 = _mm_mulhi_epu16(mA2_3, D2_3); 512 513 __m128i R0_1 = _mm_add_epi16( _mm_max_epi16(S0_1, BGA0_1), mBGA0_1); 514 __m128i R2_3 = _mm_add_epi16( _mm_max_epi16(S2_3, BGA2_3), mBGA2_3); 515 516 __m128i R = _mm_packus_epi16(R0_1, R2_3); 517 R = _mm_or_si128(R, alphaMask); 518 _mm_storeu_si128(cast(__m128i*) &dest[x], R); 519 } 520 break; 521 522 case CompositeOp.DarkenOnly: 523 // (1 - alpha) * bg + min(bg, fg) * alpha 524 // == (1 - alpha) * bg + min(bg * alpha, fg * alpha) and we get fg.alpha 525 for (int x = startx; x < endx; x += 4) 526 { 527 __m128i D = _mm_loadu_si128(cast(__m128i*) &dest[x]); // BG 528 __m128i S = _mm_loadu_si128(cast(__m128i*) &tmpBuf[x]); // FG * A, A 529 __m128i Z = _mm_setzero_si128(); 530 531 __m128i D0_1 = _mm_unpacklo_epi8(D, Z); // two background pixels 532 __m128i D2_3 = _mm_unpackhi_epi8(D, Z); // ditto 533 __m128i S0_1 = _mm_unpacklo_epi8(S, Z); // two foreground pixels 534 __m128i S2_3 = _mm_unpackhi_epi8(S, Z); // ditto 535 536 __m128i A0_1 = _mm_shufflelo_epi16!0xff(_mm_shufflehi_epi16!0xff(S0_1)); // A0 A0 A0 A0 A1 A1 A1 A1 537 __m128i A2_3 = _mm_shufflelo_epi16!0xff(_mm_shufflehi_epi16!0xff(S2_3)); // A2 A2 A2 A2 A3 A3 A3 A3 538 __m128i m255 = _mm_set1_epi16(255); 539 __m128i mA0_1 = _mm_sub_epi16(m255, A0_1); // (255-A0)x4 (255-A1)x4 540 __m128i mA2_3 = _mm_sub_epi16(m255, A2_3); 541 A0_1 = _mm_slli_epi16(A0_1, 8); 542 A2_3 = _mm_slli_epi16(A2_3, 8); 543 mA0_1 = _mm_slli_epi16(mA0_1, 8); 544 mA2_3 = _mm_slli_epi16(mA2_3, 8); 545 546 // multiply bg * alpha 547 __m128i BGA0_1 = _mm_mulhi_epu16(A0_1, D0_1); 548 __m128i BGA2_3 = _mm_mulhi_epu16(A2_3, D2_3); 549 550 // multiply bg * (1 - alpha) 551 __m128i mBGA0_1 = _mm_mulhi_epu16(mA0_1, D0_1); 552 __m128i mBGA2_3 = _mm_mulhi_epu16(mA2_3, D2_3); 553 554 __m128i R0_1 = _mm_add_epi16( _mm_min_epi16(S0_1, BGA0_1), mBGA0_1); 555 __m128i R2_3 = _mm_add_epi16( _mm_min_epi16(S2_3, BGA2_3), mBGA2_3); 556 557 __m128i R = _mm_packus_epi16(R0_1, R2_3); 558 R = _mm_or_si128(R, alphaMask); 559 _mm_storeu_si128(cast(__m128i*) &dest[x], R); 560 } 561 break; 562 } 563 } 564 565 pDest += imagePitchInBytes; 566 567 // clear scandelta overspill 568 569 m_scandelta[endx] = 0; 570 571 debug(checkRasterizer) 572 { 573 574 size_t size = m_scandelta.length; 575 foreach(e; m_scandelta) assert(e == 0); 576 } 577 } 578 579 // clear clip buffers overspill 580 581 m_clipbfr_l[endy] = 0; 582 m_clipbfr_r[endy] = 0; 583 584 debug(checkRasterizer) 585 { 586 foreach(e; m_clipbfr_l) assert(e == 0); 587 foreach(e; m_clipbfr_r) assert(e == 0); 588 } 589 590 // clear m_buckets overspill, this is only needed because in very 591 // rare cases we could end up with an edge could end up on the 592 // bottom clip boundry after spliting an edge, these should really 593 // be removed in the clipping code 594 595 m_buckets[endy] = null; 596 597 debug(checkRasterizer) 598 { 599 foreach(e; m_buckets) assert(e == null); 600 } 601 602 m_edgepool.reset(); 603 } 604 605 /* 606 drawing methods 607 */ 608 609 void moveTo(double x, double y) 610 { 611 intMoveTo(cast(int)(x * fpScale), cast(int)(y * fpScale)); 612 m_fprevx = x; 613 m_fprevy = y; 614 } 615 616 void moveTo(float x, float y) 617 { 618 intMoveTo(cast(int)(x * fpScale), cast(int)(y * fpScale)); 619 m_fprevx = x; 620 m_fprevy = y; 621 } 622 623 void lineTo(double x, double y) 624 { 625 intLineTo(cast(int)(x * fpScale), cast(int)(y * fpScale)); 626 m_fprevx = x; 627 m_fprevy = y; 628 } 629 630 void lineTo(float x, float y) 631 { 632 intLineTo(cast(int)(x * fpScale), cast(int)(y * fpScale)); 633 m_fprevx = x; 634 m_fprevy = y; 635 } 636 637 void quadTo(float x1, float y1, float x2, float y2) 638 { 639 float x01 = (m_fprevx+x1)*0.5; 640 float y01 = (m_fprevy+y1)*0.5; 641 float x12 = (x1+x2)*0.5; 642 float y12 = (y1+y2)*0.5; 643 float xctr = (x01+x12)*0.5; 644 float yctr = (y01+y12)*0.5; 645 float err = (x1-xctr)*(x1-xctr)+(y1-yctr)*(y1-yctr); 646 647 if (err > 0.1) 648 { 649 quadTo(x01,y01,xctr,yctr); 650 quadTo(x12,y12,x2,y2); 651 } 652 else 653 { 654 intLineTo(cast(int)(x2 * fpScale), cast(int)(y2 * fpScale)); 655 } 656 657 m_fprevx = x2; 658 m_fprevy = y2; 659 } 660 661 void cubicTo(float x1, float y1, float x2, float y2, float x3, float y3) 662 { 663 bool error = (x1 == x2 && x2 == x3 && y1 == y2 && y2 == y3); 664 665 // Avoid stack overflow with infinite recursion. 666 // It is poorly understood why that happens. 667 if (error) 668 return; 669 670 float x01 = (m_fprevx+x1)*0.5; 671 float y01 = (m_fprevy+y1)*0.5; 672 float x12 = (x1+x2)*0.5; 673 float y12 = (y1+y2)*0.5; 674 float x23 = (x2+x3)*0.5; 675 float y23 = (y2+y3)*0.5; 676 677 float xc0 = (x01+x12)*0.5; 678 float yc0 = (y01+y12)*0.5; 679 float xc1 = (x12+x23)*0.5; 680 float yc1 = (y12+y23)*0.5; 681 float xctr = (xc0+xc1)*0.5; 682 float yctr = (yc0+yc1)*0.5; 683 684 // this flattenening test code was from a page on the antigrain geometry 685 // website. 686 687 float dx = x3-m_fprevx; 688 float dy = y3-m_fprevy; 689 690 double d2 = fast_fabs(((x1 - x3) * dy - (y1 - y3) * dx)); 691 double d3 = fast_fabs(((x2 - x3) * dy - (y2 - y3) * dx)); 692 693 if((d2 + d3)*(d2 + d3) < 0.5 * (dx*dx + dy*dy)) 694 { 695 intLineTo(cast(int)(x3 * fpScale), cast(int)(y3 * fpScale)); 696 } 697 else 698 { 699 cubicTo(x01,y01,xc0,yc0,xctr,yctr); 700 cubicTo(xc1,yc1,x23,y23,x3,y3); 701 } 702 703 m_fprevx = x3; 704 m_fprevy = y3; 705 } 706 707 void closePath() 708 { 709 if ((m_prevx != m_subpx) || (m_prevy != m_subpy)) 710 { 711 intLineTo(m_subpx, m_subpy); 712 } 713 } 714 715 private: 716 717 // internal moveTo. Note this will close any existing subpath because 718 // unclosed paths cause bad things to happen. (visually at least) 719 720 void intMoveTo(int x, int y) 721 { 722 closePath(); 723 m_prevx = x; 724 m_prevy = y; 725 m_subpx = x; 726 m_subpy = y; 727 } 728 729 // internal lineTo, clips and adds the line to edge buckets and clip 730 // buffers as appropriate 731 732 void intLineTo(int x, int y) 733 { 734 // mixin for adding edges. For some reason LDC wouldnt inline this when 735 // it was a separate function, and it was 15% slower that way 736 737 string addEdgeM(string x0, string y0, string x1, string y1, string dir) 738 { 739 string tmp = (dir == "+") ? (y1~"-"~y0) : (y0~"-"~y1); 740 return 741 "Edge* edge = m_edgepool.allocate();" ~ 742 "edge.dx = cast(long) (fpDXScale * ("~x1~"-"~x0~") / ("~y1~"-"~y0~"));" ~ 743 "edge.x = (cast(long) "~x0~") << 32;" ~ 744 "edge.y = "~y0~";" ~ 745 "edge.y2 = "~y1~";" ~ 746 "int by = "~y0~" >> fpFracBits;" ~ 747 "int xxx = max(abs("~x1~"-"~x0~"),1);" ~ 748 "edge.dy = cast(long) (fpDYScale * ("~tmp~") / xxx);" ~ 749 "edge.next = m_buckets[by];" ~ 750 "m_buckets[by] = edge;"; 751 } 752 753 // mixin for clip accumulator 754 755 string addToClip(string y0, string y1, string side, string dir) 756 { 757 return 758 "{ int i0 = "~y0~" >> fpFracBits;" ~ 759 "int f0 = ("~y0~" & 0xFF) << 7;" ~ 760 "int i1 = "~y1~" >> fpFracBits;" ~ 761 "int f1 = ("~y1~" & 0xFF) << 7;" ~ 762 "m_clipbfr_"~side~"[i0] "~dir~"= 32768-f0;" ~ 763 "m_clipbfr_"~side~"[i0+1] "~dir~"= f0;" ~ 764 "m_clipbfr_"~side~"[i1] "~dir~"= f1-32768;" ~ 765 "m_clipbfr_"~side~"[i1+1] "~dir~"= -f1; }"; 766 } 767 768 // handle upward and downward lines seperately 769 770 if (m_prevy < y) 771 { 772 int x0 = m_prevx, y0 = m_prevy, x1 = x, y1 = y; 773 774 // edge is outside clip box or horizontal 775 776 if ((y0 == y1) || (y0 >= m_clipbottom) || (y1 <= m_cliptop)) 777 { 778 goto finished; 779 } 780 781 // clip to top and bottom 782 783 if (y0 < m_cliptop) 784 { 785 x0 = x0 + MulDiv64(m_cliptop - y0, x1 - x0, y1 - y0); 786 y0 = m_cliptop; 787 } 788 789 if (y1 > m_clipbottom) 790 { 791 x1 = x0 + MulDiv64(m_clipbottom - y0, x1 - x0, y1 - y0); 792 y1 = m_clipbottom; 793 } 794 795 // track y extent 796 797 if (y0 < m_yrmin) m_yrmin = y0; 798 if (y1 > m_yrmax) m_yrmax = y1; 799 800 // generate horizontal zoning flags, these are set depending on where 801 // x0 and x1 are in respect of the clip box. 802 803 uint a = cast(uint)(x0<m_clipleft); 804 uint b = cast(uint)(x0>m_clipright); 805 uint c = cast(uint)(x1<m_clipleft); 806 uint d = cast(uint)(x1>m_clipright); 807 uint flags = a | (b*2) | (c*4) | (d*8); 808 809 if (flags == 0) // bit faster to pull no clip out front 810 { 811 mixin(addEdgeM("x0","y0","x1","y1","+")); 812 goto finished; 813 } 814 815 // note cliping here can occasionaly result in horizontals, and can 816 // ocaisionaly put a horizontal on bucket for clipbotttom, which is 817 // outside the drawable area, currently it allows it and zeros that 818 // bucket after rasterization. 819 820 switch (flags) 821 { 822 case (1): // 0001 --> x0 left, x1 center 823 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0); 824 mixin(addToClip("y0","sy","l","+")); 825 mixin(addEdgeM("m_clipleft","sy","x1","y1","+")); 826 break; 827 case (2): // 0010 --> x0 right, x1 center 828 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0); 829 mixin(addToClip("y0","sy","r","+")); 830 mixin(addEdgeM("m_clipright","sy","x1","y1","+")); 831 break; 832 case (4): // 0100 --> x0 center, x1 left 833 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0); 834 mixin(addEdgeM("x0","y0","m_clipleft","sy","+")); 835 mixin(addToClip("sy","y1","l","+")); 836 break; 837 case (5): // 0101 --> x0 left, x1 left 838 mixin(addToClip("y0","y1","l","+")); 839 break; 840 case (6): // 0110 --> x0 right, x1 left 841 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0); 842 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0); 843 mixin(addToClip("y0","sr","r","+")); 844 mixin(addEdgeM("m_clipright","sr","m_clipleft","sl","+")); 845 mixin(addToClip("sl","y1","l","+")); 846 break; 847 case (8): // 1000 --> x0 center, x1 right 848 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0); 849 mixin(addEdgeM("x0","y0","m_clipright","sy","+")); 850 mixin(addToClip("sy","y1","r","+")); 851 break; 852 case (9): // 1001 --> x0 left, x1 right 853 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0); 854 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0); 855 mixin(addToClip("y0","sl","l","+")); 856 mixin(addEdgeM("m_clipleft","sl","m_clipright","sr","+")); 857 mixin(addToClip("sr","y1","r","+")); 858 break; 859 case (10): // 1001 --> x0 right, x1 right 860 mixin(addToClip("y0","y1","r","+")); 861 break; 862 default: // everything else is NOP 863 break; 864 } 865 } 866 else 867 { 868 int x1 = m_prevx, y1 = m_prevy, x0 = x, y0 = y; 869 870 // edge is outside clip box or horizontal 871 872 if ((y0 == y1) || (y0 >= m_clipbottom) || (y1 <= m_cliptop)) 873 { 874 goto finished; 875 } 876 877 // clip to top and bottom 878 879 if (y0 < m_cliptop) 880 { 881 x0 = x0 + MulDiv64(m_cliptop - y0, x1 - x0, y1 - y0); 882 y0 = m_cliptop; 883 } 884 885 if (y1 > m_clipbottom) 886 { 887 x1 = x0 + MulDiv64(m_clipbottom - y0, x1 - x0, y1 - y0); 888 y1 = m_clipbottom; 889 } 890 891 // track y extent 892 893 if (y0 < m_yrmin) m_yrmin = y0; 894 if (y1 > m_yrmax) m_yrmax = y1; 895 896 // generate horizontal zoning flags, these are set depending on where 897 // x0 and x1 are in respect of the clip box. 898 899 uint a = cast(uint)(x0<m_clipleft); 900 uint b = cast(uint)(x0>m_clipright); 901 uint c = cast(uint)(x1<m_clipleft); 902 uint d = cast(uint)(x1>m_clipright); 903 uint flags = a | (b*2) | (c*4) | (d*8); 904 905 if (flags == 0) // bit faster to pull no clip out front 906 { 907 mixin(addEdgeM("x0","y0","x1","y1","-")); 908 goto finished; 909 } 910 911 // note cliping here can occasionaly result in horizontals, and can 912 // occasionally put a horizontal on bucket for clipbotttom, which is 913 // outside the drawable area, currently it allows it and zeros that 914 // bucket after rasterization. 915 916 switch (flags) 917 { 918 case (1): // 0001 --> x0 left, x1 center 919 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0); 920 mixin(addToClip("y0","sy","l","-")); 921 mixin(addEdgeM("m_clipleft","sy","x1","y1","-")); 922 break; 923 case (2): // 0010 --> x0 right, x1 center 924 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0); 925 mixin(addToClip("y0","sy","r","-")); 926 mixin(addEdgeM("m_clipright","sy","x1","y1","-")); 927 break; 928 case (4): // 0100 --> x0 center, x1 left 929 int sy = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0); 930 mixin(addEdgeM("x0","y0","m_clipleft","sy","-")); 931 mixin(addToClip("sy","y1","l","-")); 932 break; 933 case (5): // 0101 --> x0 left, x1 left 934 mixin(addToClip("y0","y1","l","-")); 935 break; 936 case (6): // 0110 --> x0 right, x1 left 937 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0); 938 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0); 939 mixin(addToClip("y0","sr","r","-")); 940 mixin(addEdgeM("m_clipright","sr","m_clipleft","sl","-")); 941 mixin(addToClip("sl","y1","l","-")); 942 break; 943 case (8): // 1000 --> x0 center, x1 right 944 int sy = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0); 945 mixin(addEdgeM("x0","y0","m_clipright","sy","-")); 946 mixin(addToClip("sy","y1","r","-")); 947 break; 948 case (9): // 1001 --> x0 left, x1 right 949 int sl = y0 + MulDiv64(y1 - y0, m_clipleft - x0, x1 - x0); 950 int sr = y0 + MulDiv64(y1 - y0, m_clipright - x0, x1 - x0); 951 mixin(addToClip("y0","sl","l","-")); 952 mixin(addEdgeM("m_clipleft","sl","m_clipright","sr","-")); 953 mixin(addToClip("sr","y1","r","-")); 954 break; 955 case (10): // 1001 --> x0 right, x1 right 956 mixin(addToClip("y0","y1","r","-")); 957 break; 958 default: // everything else is NOP 959 break; 960 } 961 } 962 963 finished: 964 965 m_prevx = x; 966 m_prevy = y; 967 } 968 969 // edge struct 970 971 struct Edge 972 { 973 long x, dx, dy; 974 int y, y2; 975 Edge* next; 976 } 977 978 ArenaAllocator!(Edge,100) m_edgepool; 979 980 // Note: the reason why it's Vec is to avoid an initialization that would reallocate down. 981 982 Vec!(Edge*) m_buckets; 983 Vec!int m_scandelta; 984 985 size_t m_deltamaskSize = 0; 986 size_t m_deltamaskAlloc = 0; 987 DMWord* m_deltamask; 988 989 Vec!int m_clipbfr_l; 990 Vec!int m_clipbfr_r; 991 992 // clip rectangle, in 24:8 fixed point 993 994 int m_clipleft; 995 int m_cliptop; 996 int m_clipright; 997 int m_clipbottom; 998 999 // keeps track of y extent 1000 1001 int m_yrmin,m_yrmax; 1002 1003 // start of current subpath, 1004 1005 int m_subpx,m_subpy; 1006 1007 // previous x,y (internal coords) 1008 1009 int m_prevx,m_prevy; 1010 1011 // previous x,y float coords 1012 1013 float m_fprevx,m_fprevy; 1014 1015 // Temporary dest buffer for Porter Duff operations. 1016 Vec!ubyte m_destBuf; 1017 } 1018 1019 // the rasterizer itself should be a reusable, small object suitable for the stack 1020 static assert(Rasterizer.sizeof < 280); 1021 1022 private: 1023 1024 int MulDiv64(int a, int b, int c) 1025 { 1026 return cast(int) ((cast(long) a * b) / c); 1027 }