1 /** 2 * Original fixed-function PBR rendering in Dplug. 3 * For compatibility purpose. 4 * 5 * Copyright: Copyright Auburn Sounds 2015-2019. 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 */ 8 module dplug.gui.legacypbr; 9 10 11 import core.stdc.stdio; 12 import std.math; 13 14 import dplug.math.vector; 15 import dplug.math.box; 16 import dplug.math.matrix; 17 18 import dplug.core.vec; 19 import dplug.core.nogc; 20 import dplug.core.math; 21 import dplug.core.thread; 22 23 import dplug.gui.compositor; 24 25 import dplug.graphics; 26 import dplug.window.window; 27 28 import dplug.gui.ransac; 29 30 import inteli.math; 31 import inteli.emmintrin; 32 import dplug.gui.profiler; 33 34 // TODO: PBR rendering doesn't depend rightly on size of the plugin. 35 // The #RESIZE tag below makrs all areas that needs updating. 36 37 38 /// When inheriging from `MultipassCompositor`, you can define what the passes exchange 39 /// between each other. However, the first field has to be a `CompositorPassBuffers`. 40 struct PBRCompositorPassBuffers 41 { 42 // First field must be `CompositorPassBuffers` for ABI compatibility of `MultipassCompositor`. 43 CompositorPassBuffers parent; 44 alias parent this; 45 46 // Computed normal, one buffer per thread 47 OwnedImage!RGBf[] normalBuffers; 48 49 // Accumulates light for each deferred pass, one buffer per thread 50 OwnedImage!RGBAf[] accumBuffers; 51 52 // Approximate of normal variance, one buffer per thread 53 OwnedImage!L32f[] varianceBuffers; 54 } 55 56 57 /// Equivalence factor between Z samples and pixels. 58 /// Tuned once by hand to match the other normal computation algorithm 59 /// This affects virtual geometry, and as such: normals and raymarching into depth. 60 /// Future: this should be modifiable in order to have more Z range in plugins (more 3D). 61 /// Bug: resizing should affect this factor. 62 enum float FACTOR_Z = 4655.0f; // #RESIZE: this factor depends on DPI 63 64 /// Originally, Dplug compositor was fixed function. 65 /// This is the legacy compositor. 66 class PBRCompositor : MultipassCompositor 67 { 68 nothrow @nogc: 69 70 71 // <LEGACY> parameters, reproduced here as properties for compatibility. 72 // Instead you are supposed to tweak settings when creating the passes. 73 74 void light1Color(vec3f color) 75 { 76 (cast(PassObliqueShadowLight)getPass(PASS_OBLIQUE_SHADOW)).color = color; 77 } 78 79 void light2Dir(vec3f dir) 80 { 81 (cast(PassDirectionalLight)getPass(PASS_DIRECTIONAL)).direction = dir; 82 } 83 84 void light2Color(vec3f color) 85 { 86 (cast(PassDirectionalLight)getPass(PASS_DIRECTIONAL)).color = color; 87 } 88 89 void light3Dir(vec3f dir) 90 { 91 (cast(PassSpecularLight)getPass(PASS_SPECULAR)).direction = dir; 92 } 93 94 void light3Color(vec3f color) 95 { 96 (cast(PassSpecularLight)getPass(PASS_SPECULAR)).color = color; 97 } 98 99 void skyboxAmount(float amount) 100 { 101 (cast(PassSkyboxReflections)getPass(PASS_SKYBOX)).amount = amount; 102 } 103 104 void ambientLight(float amount) 105 { 106 (cast(PassAmbientOcclusion)getPass(PASS_AO)).amount = amount; 107 } 108 109 // </LEGACY> 110 111 112 113 private enum // MUST be kept in sync with below passes, it's for legacy purpose 114 { 115 PASS_NORMAL = 0, 116 PASS_AO = 1, 117 PASS_OBLIQUE_SHADOW = 2, 118 PASS_DIRECTIONAL = 3, 119 PASS_SPECULAR = 4, 120 PASS_SKYBOX = 5, 121 PASS_EMISSIVE = 6, 122 PASS_CLAMP = 7 123 } 124 125 this(CompositorCreationContext* context) 126 { 127 super(context); 128 129 _normalBuffers = mallocSlice!(OwnedImage!RGBf)(numThreads()); 130 _accumBuffers = mallocSlice!(OwnedImage!RGBAf)(numThreads()); 131 _varianceBuffers = mallocSlice!(OwnedImage!L32f)(numThreads()); 132 133 for (int t = 0; t < numThreads(); ++t) 134 { 135 _normalBuffers[t] = mallocNew!(OwnedImage!RGBf)(); 136 _accumBuffers[t] = mallocNew!(OwnedImage!RGBAf)(); 137 _varianceBuffers[t] = mallocNew!(OwnedImage!L32f)(); 138 } 139 140 // Create the passes 141 addPass( mallocNew!PassComputeNormal(this) ); // PASS_NORMAL 142 addPass( mallocNew!PassAmbientOcclusion(this) ); // PASS_AO 143 addPass( mallocNew!PassObliqueShadowLight(this) ); // PASS_OBLIQUE_SHADOW 144 addPass( mallocNew!PassDirectionalLight(this) ); // PASS_DIRECTIONAL 145 addPass( mallocNew!PassSpecularLight(this) ); // PASS_SPECULAR 146 addPass( mallocNew!PassSkyboxReflections(this) ); // PASS_SKYBOX 147 addPass( mallocNew!PassEmissiveContribution(this) ); // PASS_EMISSIVE 148 addPass( mallocNew!PassClampAndConvertTo8bit(this) ); // PASS_CLAMP 149 } 150 151 ~this() 152 { 153 for (size_t t = 0; t < _normalBuffers.length; ++t) 154 { 155 _normalBuffers[t].destroyFree(); 156 _accumBuffers[t].destroyFree(); 157 _varianceBuffers[t].destroyFree(); 158 } 159 freeSlice(_normalBuffers); 160 freeSlice(_accumBuffers); 161 freeSlice(_varianceBuffers); 162 } 163 164 override void resizeBuffers(int width, 165 int height, 166 int areaMaxWidth, 167 int areaMaxHeight) 168 { 169 super.resizeBuffers(width, height, areaMaxWidth, areaMaxHeight); 170 171 // Create numThreads thread-local buffers of areaMaxWidth x areaMaxHeight size. 172 for (int t = 0; t < numThreads(); ++t) 173 { 174 175 int border_0 = 0; 176 int rowAlign_1 = 1; 177 int rowAlign_16 = 16; 178 _normalBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_1); 179 _accumBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_16); 180 _varianceBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_1); 181 } 182 } 183 184 185 override void compositeTile(ImageRef!RGBA wfb, 186 const(box2i)[] areas, 187 Mipmap!RGBA diffuseMap, 188 Mipmap!RGBA materialMap, 189 Mipmap!L16 depthMap, 190 IProfiler profiler) 191 { 192 // Call each pass in sequence 193 PBRCompositorPassBuffers buffers; 194 buffers.outputBuf = &wfb; 195 buffers.diffuseMap = diffuseMap; 196 buffers.materialMap = materialMap; 197 buffers.depthMap = depthMap; 198 buffers.accumBuffers = _accumBuffers; 199 buffers.normalBuffers = _normalBuffers; 200 buffers.varianceBuffers = _varianceBuffers; 201 202 // For each tile, do all pass one by one. 203 void compositeOneTile(int i, int threadIndex) nothrow @nogc 204 { 205 OwnedImage!RGBAf accumBuffer = _accumBuffers[threadIndex]; 206 207 version(Dplug_ProfileUI) 208 { 209 profiler.category("PBR"); 210 } 211 212 box2i area = areas[i]; 213 // Clear the accumulation buffer, since all passes add to it 214 { 215 RGBAf zero = RGBAf(0.0f, 0.0f, 0.0f, 0.0f); 216 for (int j = 0; j < area.height; ++j) 217 { 218 RGBAf* accumScan = accumBuffer.scanline(j).ptr; 219 accumScan[0..area.width] = zero; 220 } 221 } 222 223 224 225 foreach(pass; passes()) 226 { 227 version(Dplug_ProfileUI) 228 { 229 char[96] buf; 230 snprintf(buf.ptr, 96, "Pass %s".ptr, pass.name.ptr); 231 profiler.begin(buf); 232 } 233 234 pass.renderIfActive(threadIndex, area, cast(CompositorPassBuffers*)&buffers); 235 236 version(Dplug_ProfileUI) 237 { 238 profiler.end; 239 } 240 } 241 } 242 int numAreas = cast(int)areas.length; 243 threadPool().parallelFor(numAreas, &compositeOneTile); 244 } 245 246 private: 247 OwnedImage!RGBf[] _normalBuffers; // store computed normals 248 OwnedImage!RGBAf[] _accumBuffers; // store accumulated color 249 OwnedImage!L32f[] _varianceBuffers; // store computed normal variance, useful for anti-aliasing 250 } 251 252 // Compute normals from depth, and normal variance. 253 class PassComputeNormal : CompositorPass 254 { 255 nothrow: 256 @nogc: 257 258 this(MultipassCompositor parent) 259 { 260 super(parent); 261 } 262 263 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 264 { 265 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 266 OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex]; 267 OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0]; 268 OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex]; 269 270 const int depthPitchBytes = depthLevel0.pitchInBytes(); 271 272 for (int j = area.min.y; j < area.max.y; ++j) 273 { 274 RGBf* normalScan = normalBuffer.scanline(j - area.min.y).ptr; 275 L32f* varianceScan = varianceBuffer.scanline(j - area.min.y).ptr; 276 277 // Note: because the level 0 of depth map has a border of 1 and a trailingSamples of 2, 278 // then we are allowed to read 4 depth samples at once. 279 const(L16)* depthScan = depthLevel0.scanlinePtr(j); 280 281 for (int i = area.min.x; i < area.max.x; ++i) 282 { 283 // Compute normal 284 { 285 const(L16)* depthHere = depthScan + i; 286 const(L16)* depthHereM1 = cast(const(L16)*) ( cast(const(ubyte)*)depthHere - depthPitchBytes ); 287 const(L16)* depthHereP1 = cast(const(L16)*) ( cast(const(ubyte)*)depthHere + depthPitchBytes ); 288 enum float multUshort = 1.0 / FACTOR_Z; 289 float[9] depthNeighbourhood = void; 290 depthNeighbourhood[0] = depthHereM1[-1].l * multUshort; 291 depthNeighbourhood[1] = depthHereM1[ 0].l * multUshort; 292 depthNeighbourhood[2] = depthHereM1[+1].l * multUshort; 293 depthNeighbourhood[3] = depthHere[-1].l * multUshort; 294 depthNeighbourhood[4] = depthHere[ 0].l * multUshort; 295 depthNeighbourhood[5] = depthHere[+1].l * multUshort; 296 depthNeighbourhood[6] = depthHereP1[-1].l * multUshort; 297 depthNeighbourhood[7] = depthHereP1[ 0].l * multUshort; 298 depthNeighbourhood[8] = depthHereP1[+1].l * multUshort; 299 vec3f normal = computePlaneFittingNormal(depthNeighbourhood.ptr); 300 normalScan[i - area.min.x] = RGBf(normal.x, normal.y, normal.z); 301 } 302 303 // Compute normal variance (old method) 304 { 305 const(ubyte)* depthHere = cast(const(ubyte)*)(depthScan + i); 306 307 // Read 12 depth samples, the rightmost are unused 308 __m128i depthSamplesM1 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere - depthPitchBytes - 2) ); 309 __m128i depthSamplesP0 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere - 2) ); 310 __m128i depthSamplesP1 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere + depthPitchBytes - 2) ); 311 312 // Extend to float 313 __m128i zero = _mm_setzero_si128(); 314 __m128 depthM1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesM1, zero)); 315 __m128 depthP0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesP0, zero)); 316 __m128 depthP1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesP1, zero)); 317 318 enum useLaplacian = false; 319 static if (useLaplacian) 320 { 321 // Possible a bit better, not tried further since 322 // it is a pain to make it match for the passes that uses it. 323 // 2nd-order-derivative for depth in the X direction 324 align(16) static immutable float[12] LAPLACIAN = 325 [ 326 0.25, 0.5, 0.25, 0, 327 0.5, -3.0, 0.5, 0, 328 0.25, 0.5, 0.25, 0, 329 ]; 330 331 __m128 mul = depthM1 * _mm_load_ps(&LAPLACIAN[0]) 332 + depthP0 * _mm_load_ps(&LAPLACIAN[4]) 333 + depthP1 * _mm_load_ps(&LAPLACIAN[8]); 334 float laplace = mul.array[0] + mul.array[1] + mul.array[2] + mul.array[3]; 335 laplace /= 256.0f; 336 float variance = laplace*laplace; 337 } 338 else 339 { 340 // 2nd-order-derivative for depth in the X direction 341 // 1 -2 1 342 // 1 -2 1 343 // 1 -2 1 344 const(__m128) fact_DDX_M1 = _mm_setr_ps( 1.0f, -2.0f, 1.0f, 0.0f); 345 __m128 mulForDDX = fact_DDX_M1 * (depthM1 + depthP0 + depthP1); 346 float depthDX = mulForDDX.array[0] + mulForDDX.array[1] + mulForDDX.array[2]; 347 348 // 2nd-order-derivative for depth in the Y direction 349 // 1 1 1 350 // -2 -2 -2 351 // 1 1 1 352 const(__m128) fact_DDY_M1 = _mm_setr_ps( 1.0f, 1.0f, 1.0f, 0.0f); 353 const(__m128) fact_DDY_P0 = _mm_setr_ps(-2.0f, -2.0f, -2.0f, 0.0f); 354 __m128 mulForDDY = fact_DDY_M1 * (depthM1 + depthP1) + depthP0 * fact_DDY_P0; 355 float depthDY = mulForDDY.array[0] + mulForDDY.array[1] + mulForDDY.array[2]; 356 357 depthDX *= (1 / 256.0f); // #RESIZE: sounds strange 358 depthDY *= (1 / 256.0f); 359 float variance = (depthDX * depthDX + depthDY * depthDY); 360 } 361 varianceScan[i - area.min.x] = L32f(variance); 362 } 363 } 364 } 365 } 366 } 367 368 369 /// Give light depending on whether the pixels are statistically above their neighbours. 370 class PassAmbientOcclusion : CompositorPass 371 { 372 nothrow: 373 @nogc: 374 375 float amount = 0.08125f; 376 377 // TODO: add ambient light color 378 379 this(MultipassCompositor parent) 380 { 381 super(parent); 382 } 383 384 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 385 { 386 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 387 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 388 Mipmap!L16 depthMap = PBRbuf.depthMap; 389 OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0]; 390 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 391 392 for (int j = area.min.y; j < area.max.y; ++j) 393 { 394 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 395 const(L16*) depthScan = depthLevel0.scanlinePtr(j); 396 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 397 398 for (int i = area.min.x; i < area.max.x; ++i) 399 { 400 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]); 401 402 const(L16)* depthHere = depthScan + i; 403 404 float px = i + 0.5f; 405 float py = j + 0.5f; 406 407 // #RESIZE: if the plugin is large, should sample higher in mipmap levels 408 409 float avgDepthHere = 410 ( depthMap.linearSample(1, px, py) 411 + depthMap.linearSample(2, px, py) 412 + depthMap.linearSample(3, px, py) 413 + depthMap.linearSample(4, px, py) ) * 0.25f; 414 415 float diff = (*depthHere).l - avgDepthHere; 416 417 enum float divider23040 = 1.0f / 23040; 418 float cavity = (diff + 23040.0f) * divider23040; 419 if (cavity >= 1) 420 cavity = 1; 421 else if (cavity < 0) 422 cavity = 0; 423 424 __m128 color = baseColor * _mm_set1_ps(cavity * amount); 425 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + color); 426 } 427 } 428 } 429 } 430 431 class PassObliqueShadowLight : CompositorPass 432 { 433 nothrow: 434 @nogc: 435 436 /// Color of this light pass. 437 vec3f color = vec3f(0.25f, 0.25f, 0.25f) * 1.3f; 438 439 this(MultipassCompositor parent) 440 { 441 super(parent); 442 } 443 444 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 445 { 446 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 447 OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0]; 448 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 449 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 450 451 // Add a primary light that cast shadows 452 453 enum float fallOff = 0.78f; // #RESIZE, recompute that table as needed 454 455 int samples = 11; // #RESIZE ditto 456 457 static immutable float[11] weights = 458 [ 459 1.0f, 460 fallOff, 461 fallOff ^^ 2, 462 fallOff ^^ 3, 463 fallOff ^^ 4, 464 fallOff ^^ 5, 465 fallOff ^^ 6, 466 fallOff ^^ 7, 467 fallOff ^^ 8, 468 fallOff ^^ 9, 469 fallOff ^^ 10 470 ]; 471 472 enum float totalWeights = (1.0f - (fallOff ^^ 11)) / (1.0f - fallOff) - 1; 473 enum float invTotalWeights = 1 / (1.7f * totalWeights); 474 475 int wholeWidth = depthLevel0.w; 476 int wholeHeight = depthLevel0.h; 477 478 for (int j = area.min.y; j < area.max.y; ++j) 479 { 480 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 481 482 const(L16*) depthScan = depthLevel0.scanlinePtr(j); 483 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 484 485 for (int i = area.min.x; i < area.max.x; ++i) 486 { 487 const(L16)* depthHere = depthScan + i; 488 RGBA ibaseColor = diffuseScan[i]; 489 vec3f baseColor = vec3f(ibaseColor.r, ibaseColor.g, ibaseColor.b) * div255; 490 491 float lightPassed = 0.0f; 492 493 int depthCenter = (*depthHere).l; 494 for (int sample = 1; sample < samples; ++sample) 495 { 496 int x1 = i + sample; 497 if (x1 >= wholeWidth) 498 x1 = wholeWidth - 1; 499 int x2 = i - sample; 500 if (x2 < 0) 501 x2 = 0; 502 int y = j - sample; 503 if (y < 0) 504 y = 0; 505 int z = depthCenter + sample; // ??? 506 L16* scan = depthLevel0.scanlinePtr(y); 507 int diff1 = z - scan[x1].l; // FUTURE: use pointer offsets here instead of opIndex 508 int diff2 = z - scan[x2].l; 509 510 float contrib1 = void, 511 contrib2 = void; 512 513 static immutable float divider15360 = 1.0f / 15360; // BUG: not consistent with FACTOR_Z, this is steeper... 514 515 if (diff1 >= 0) 516 contrib1 = 1; 517 else if (diff1 < -15360) 518 contrib1 = 0; 519 else 520 contrib1 = (diff1 + 15360) * divider15360; 521 522 if (diff2 >= 0) 523 contrib2 = 1; 524 else if (diff2 < -15360) 525 contrib2 = 0; 526 else 527 contrib2 = (diff2 + 15360) * divider15360; 528 529 lightPassed += (contrib1 + contrib2 * 0.7f) * weights[sample]; 530 } 531 vec3f finalColor = baseColor * color * (lightPassed * invTotalWeights); 532 __m128 mmColor = _mm_setr_ps(finalColor.r, finalColor.g, finalColor.b, 0.0f); 533 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + mmColor); 534 } 535 } 536 } 537 } 538 539 class PassDirectionalLight : CompositorPass 540 { 541 nothrow: 542 @nogc: 543 public: 544 545 /// World-space direction. Unsure of the particular space it lives in. 546 vec3f direction = vec3f(0.0f, 1.0f, 0.1f).normalized; 547 548 /// Color of this light pass. 549 vec3f color = vec3f(0.481f, 0.481f, 0.481f); 550 551 this(MultipassCompositor parent) 552 { 553 super(parent); 554 } 555 556 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 557 { 558 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 559 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 560 OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0]; 561 OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex]; 562 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 563 564 // secundary light 565 for (int j = area.min.y; j < area.max.y; ++j) 566 { 567 RGBA* materialScan = materialLevel0.scanlinePtr(j); 568 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 569 RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y); 570 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 571 572 for (int i = area.min.x; i < area.max.x; ++i) 573 { 574 RGBf normalFromBuf = normalScan[i - area.min.x]; 575 RGBA materialHere = materialScan[i]; 576 float roughness = materialHere.r * div255; 577 RGBA ibaseColor = diffuseScan[i]; 578 vec3f baseColor = vec3f(ibaseColor.r, ibaseColor.g, ibaseColor.b) * div255; 579 vec3f normal = vec3f(normalFromBuf.r, normalFromBuf.g, normalFromBuf.b); 580 float diffuseFactor = 0.5f + 0.5f * dot(normal, direction); 581 diffuseFactor = linmap!float(diffuseFactor, 0.24f - roughness * 0.5f, 1, 0, 1.0f); 582 vec3f finalColor = baseColor * color * diffuseFactor; 583 accumScan[i - area.min.x] += RGBAf(finalColor.r, finalColor.g, finalColor.b, 0.0f); 584 } 585 } 586 } 587 } 588 589 class PassSpecularLight : CompositorPass 590 { 591 nothrow: 592 @nogc: 593 public: 594 595 /// World-space direction. Unsure of the particular space it lives in. 596 vec3f direction = vec3f(0.0f, 1.0f, 0.1f).normalized; 597 598 /// Color of this light pass. 599 vec3f color = vec3f(0.26f, 0.26f, 0.26f); 600 601 this(MultipassCompositor parent) 602 { 603 super(parent); 604 _specularFactor.reallocBuffer(numThreads()); 605 _exponentFactor.reallocBuffer(numThreads()); 606 _toksvigScaleFactor.reallocBuffer(numThreads()); 607 608 // initialize new elements in the array, else realloc wouldn't work well next 609 for (int thread = 0; thread < numThreads(); ++thread) 610 { 611 _specularFactor[thread] = null; 612 _exponentFactor[thread] = null; 613 _toksvigScaleFactor[thread] = null; 614 } 615 616 for (int roughByte = 0; roughByte < 256; ++roughByte) 617 { 618 _exponentTable[roughByte] = 0.8f * exp( (1-roughByte / 255.0f) * 5.5f); 619 620 // Convert Phong exponent to Blinn-phong exponent 621 _exponentTable[roughByte] *= 2.8f; // tuned by hand to match the former "legacy" Phong specular highlight. This makes very little difference. 622 } 623 624 } 625 626 override void resizeBuffers(int width, 627 int height, 628 int areaMaxWidth, 629 int areaMaxHeight) 630 { 631 // resize all thread-local buffers 632 for (int thread = 0; thread < numThreads(); ++thread) 633 { 634 _specularFactor[thread].reallocBuffer(width); 635 _exponentFactor[thread].reallocBuffer(width); 636 _toksvigScaleFactor[thread].reallocBuffer(width); 637 } 638 } 639 640 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 641 { 642 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 643 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 644 OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0]; 645 OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex]; 646 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 647 OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex]; 648 649 int w = diffuseLevel0.w; 650 int h = diffuseLevel0.h; 651 immutable float invW = 1.0f / w; 652 immutable float invH = 1.0f / h; 653 654 __m128 mmlight3Dir = _mm_setr_ps(-direction.x, -direction.y, -direction.z, 0.0f); 655 float* pSpecular = _specularFactor[threadIndex].ptr; 656 float* pExponent = _exponentFactor[threadIndex].ptr; 657 float* pToksvigScale = _toksvigScaleFactor[threadIndex].ptr; 658 659 for (int j = area.min.y; j < area.max.y; ++j) 660 { 661 RGBA* materialScan = materialLevel0.scanlinePtr(j); 662 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 663 RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y); 664 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 665 L32f* varianceScan = varianceBuffer.scanlinePtr(j - area.min.y); 666 667 for (int i = area.min.x; i < area.max.x; ++i) 668 { 669 RGBA materialHere = materialScan[i]; 670 RGBf normalFromBuf = normalScan[i - area.min.x]; 671 __m128 normal = convertNormalToFloat4(normalFromBuf); 672 673 // TODO: this should be tuned interactively, maybe it's annoying to feel 674 // Need to compute the viewer distance from screen... and DPI. 675 // #RESIZE 676 __m128 toEye = _mm_setr_ps(0.5f - i * invW, j * invH - 0.5f, 1.0f, 0.0f); 677 toEye = _mm_fast_normalize_ps(toEye); 678 679 __m128 halfVector = toEye - mmlight3Dir; 680 halfVector = _mm_fast_normalize_ps(halfVector); 681 float specularFactor = _mm_dot_ps(halfVector, normal); 682 683 if (specularFactor < 1e-3f) 684 specularFactor = 1e-3f; 685 686 float exponent = _exponentTable[materialHere.r]; 687 688 // From NVIDIA Technical Brief: "Mipmapping Normal Maps" 689 // We use normal variance to reduce exponent and scale of the specular 690 // highlight, which should avoid aliasing. 691 float VARIANCE_FACTOR = 4e-5f; // was very hard to tune, probably should not be dx*dx+dy*dy? 692 float variance = varianceScan[i - area.min.x].l; 693 float Ft = 1.0f / (1.0f + exponent * variance * VARIANCE_FACTOR); 694 float scaleFactorToksvig = ( (1.0f + exponent * Ft) / (1.0f + exponent) ); 695 assert(scaleFactorToksvig <= 1); 696 pToksvigScale[i] = scaleFactorToksvig; 697 pSpecular[i] = specularFactor; 698 pExponent[i] = exponent * Ft; 699 } 700 701 // Just the pow operation for this line 702 { 703 int i = area.min.x; 704 for (; i + 3 < area.max.x; i += 4) 705 { 706 _mm_storeu_ps(&pSpecular[i], _mm_pow_ps(_mm_loadu_ps(&pSpecular[i]), _mm_loadu_ps(&pExponent[i]))); 707 } 708 for (; i < area.max.x; ++i) 709 { 710 pSpecular[i] = _mm_pow_ss(pSpecular[i], pExponent[i]); 711 } 712 } 713 714 for (int i = area.min.x; i < area.max.x; ++i) 715 { 716 float specularFactor = pSpecular[i]; 717 718 __m128 material = convertMaterialToFloat4(materialScan[i]); 719 RGBA materialHere = materialScan[i]; 720 float roughness = material.array[0]; 721 float metalness = material.array[1]; 722 float specular = material.array[2]; 723 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]); 724 __m128 mmLightColor = _mm_setr_ps(color.x, color.y, color.z, 0.0f); 725 726 float roughFactor = 10 * (1.0f - roughness) * (1 - metalness * 0.5f); 727 specularFactor = specularFactor * roughFactor * pToksvigScale[i]; 728 __m128 finalColor = baseColor * mmLightColor * _mm_set1_ps(specularFactor * specular); 729 730 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + finalColor); 731 } 732 } 733 } 734 735 ~this() 736 { 737 foreach(thread; 0..numThreads()) 738 { 739 _specularFactor[thread].reallocBuffer(0); 740 _exponentFactor[thread].reallocBuffer(0); 741 _toksvigScaleFactor[thread].reallocBuffer(0); 742 } 743 _specularFactor.reallocBuffer(0); 744 _exponentFactor.reallocBuffer(0); 745 _toksvigScaleFactor.reallocBuffer(0); 746 } 747 748 private: 749 float[256] _exponentTable; 750 751 // Note: those are thread-local buffers 752 float[][] _specularFactor; 753 float[][] _exponentFactor; 754 float[][] _toksvigScaleFactor; 755 } 756 757 class PassSkyboxReflections : CompositorPass 758 { 759 nothrow: 760 @nogc: 761 public: 762 763 float amount = 0.52f; 764 765 this(MultipassCompositor parent) 766 { 767 super(parent); 768 } 769 770 ~this() 771 { 772 if (_skybox !is null) 773 { 774 _skybox.destroyFree(); 775 _skybox = null; 776 } 777 } 778 779 // Note: take ownership of image 780 // That image must have been built with `mallocNew` 781 void setSkybox(OwnedImage!RGBA image) 782 { 783 if (_skybox !is null) 784 { 785 _skybox.destroyFree(); 786 _skybox = null; 787 } 788 _skybox = mallocNew!(Mipmap!RGBA)(12, image); 789 _skybox.generateMipmaps(Mipmap!RGBA.Quality.box); 790 } 791 792 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 793 { 794 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 795 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 796 OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0]; 797 OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex]; 798 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 799 OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex]; 800 801 int w = diffuseLevel0.w; 802 int h = diffuseLevel0.h; 803 immutable float invW = 1.0f / w; 804 immutable float invH = 1.0f / h; 805 806 // skybox reflection (use the same shininess as specular) 807 if (_skybox !is null) 808 { 809 for (int j = area.min.y; j < area.max.y; ++j) 810 { 811 RGBA* materialScan = materialLevel0.scanlinePtr(j); 812 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 813 RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y); 814 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 815 L32f* varianceScan = varianceBuffer.scanlinePtr(j - area.min.y); 816 817 immutable float amountOfSkyboxPixels = _skybox.width * _skybox.height; 818 819 for (int i = area.min.x; i < area.max.x; ++i) 820 { 821 // First compute the needed mipmap level for this line 822 float mipmapLevel = varianceScan[i - area.min.x].l * amountOfSkyboxPixels; 823 enum float ROUGH_FACT = 6.0f / 255.0f; 824 float roughness = materialScan[i].r; 825 mipmapLevel = 0.5f * fastlog2(1.0f + mipmapLevel * 0.00001f) + ROUGH_FACT * roughness; 826 827 immutable float fskyX = (_skybox.width - 1.0f); 828 immutable float fSkyY = (_skybox.height - 1.0f); 829 830 immutable float amountFactor = amount * div255; 831 832 // TODO: same remark than above about toEye, something to think about 833 // #RESIZE 834 __m128 toEye = _mm_setr_ps(0.5f - i * invW, j * invH - 0.5f, 1.0f, 0.0f); 835 toEye = _mm_fast_normalize_ps(toEye); 836 837 __m128 normal = convertNormalToFloat4(normalScan[i - area.min.x]); 838 __m128 pureReflection = _mm_reflectnormal_ps(toEye, normal); 839 __m128 material = convertMaterialToFloat4(materialScan[i]); 840 float metalness = material.array[1]; 841 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]); 842 float skyx = 0.5f + ((0.5f - pureReflection.array[0] * 0.5f) * fskyX); 843 float skyy = 0.5f + ((0.5f + pureReflection.array[1] * 0.5f) * fSkyY); 844 __m128 skyColorAtThisPoint = convertVec4fToFloat4( _skybox.linearMipmapSample(mipmapLevel, skyx, skyy) ); 845 __m128 color = baseColor * skyColorAtThisPoint * _mm_set1_ps(metalness * amountFactor); 846 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + color); 847 } 848 } 849 } 850 } 851 852 private: 853 /// Used for faking environment reflections. 854 Mipmap!RGBA _skybox = null; 855 } 856 857 class PassEmissiveContribution : CompositorPass 858 { 859 nothrow: 860 @nogc: 861 public: 862 863 this(MultipassCompositor parent) 864 { 865 super(parent); 866 } 867 868 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 869 { 870 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 871 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 872 Mipmap!RGBA diffuseMap = PBRbuf.diffuseMap; 873 874 // Add light emitted by neighbours 875 // Bloom-like. 876 for (int j = area.min.y; j < area.max.y; ++j) 877 { 878 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 879 for (int i = area.min.x; i < area.max.x; ++i) 880 { 881 float ic = i + 0.5f; 882 float jc = j + 0.5f; 883 884 // Get alpha-premultiplied, avoids to have to do alpha-aware mipmapping 885 // #RESIZE: more pixels => light travels further 886 vec4f colorLevel1 = diffuseMap.linearSample(1, ic, jc); 887 vec4f colorLevel2 = diffuseMap.linearSample(2, ic, jc); 888 vec4f colorLevel3 = diffuseMap.linearSample(3, ic, jc); 889 vec4f colorLevel4 = diffuseMap.linearSample(4, ic, jc); 890 vec4f colorLevel5 = diffuseMap.linearSample(5, ic, jc); 891 892 vec4f emitted = colorLevel1 * 0.00117647f; 893 emitted += colorLevel2 * 0.00176471f; 894 emitted += colorLevel3 * 0.00147059f; 895 emitted += colorLevel4 * 0.00088235f; 896 emitted += colorLevel5 * 0.00058823f; 897 accumScan[i - area.min.x] += RGBAf(emitted.r, emitted.g, emitted.b, emitted.a); 898 } 899 } 900 } 901 } 902 903 class PassClampAndConvertTo8bit : CompositorPass 904 { 905 nothrow: 906 @nogc: 907 public: 908 909 this(MultipassCompositor parent) 910 { 911 super(parent); 912 } 913 914 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 915 { 916 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 917 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 918 ImageRef!RGBA* wfb = PBRbuf.outputBuf; 919 920 immutable __m128 mm255_99 = _mm_set1_ps(255.99f); 921 immutable __m128i zero = _mm_setzero_si128(); 922 923 // Final pass, clamp, convert to ubyte 924 for (int j = area.min.y; j < area.max.y; ++j) 925 { 926 int* wfb_scan = cast(int*)(wfb.scanline(j).ptr); 927 const(RGBAf)* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 928 929 for (int i = area.min.x; i < area.max.x; ++i) 930 { 931 RGBAf accum = accumScan[i - area.min.x]; 932 __m128 color = _mm_setr_ps(accum.r, accum.g, accum.b, 1.0f); 933 __m128i icolorD = _mm_cvttps_epi32(color * mm255_99); 934 __m128i icolorW = _mm_packs_epi32(icolorD, zero); 935 __m128i icolorB = _mm_packus_epi16(icolorW, zero); 936 wfb_scan[i] = icolorB.array[0]; 937 } 938 } 939 } 940 } 941 942 943 944 945 private: 946 947 // log2 approximation by Laurent de Soras 948 // http://www.flipcode.com/archives/Fast_log_Function.shtml 949 float fastlog2(float val) pure nothrow @nogc 950 { 951 union fi_t 952 { 953 int i; 954 float f; 955 } 956 957 fi_t fi; 958 fi.f = val; 959 int x = fi.i; 960 int log_2 = ((x >> 23) & 255) - 128; 961 x = x & ~(255 << 23); 962 x += 127 << 23; 963 fi.i = x; 964 return fi.f + log_2; 965 } 966 967 // log2 approximation by Laurent de Soras 968 // http://www.flipcode.com/archives/Fast_log_Function.shtml 969 // Same but 4x at once 970 __m128 _mm_fastlog2_ps(__m128 val) pure nothrow @nogc 971 { 972 __m128i x = _mm_castps_si128(val); 973 __m128i m128 = _mm_set1_epi32(128); 974 __m128i m255 = _mm_set1_epi32(255); 975 __m128i log_2 = _mm_and_si128(_mm_srai_epi32(x, 23), m255) - m128; 976 x = _mm_and_si128(x, _mm_set1_epi32(~(255 << 23))); 977 x = x + _mm_set1_epi32(127 << 23); 978 __m128 fif = _mm_castsi128_ps(x); 979 return fif + _mm_cvtepi32_ps(log_2); 980 } 981 982 983 984 alias convertMaterialToFloat4 = convertBaseColorToFloat4; 985 986 // Convert a 8-bit color to a normalized 4xfloat color 987 __m128 convertBaseColorToFloat4(RGBA rgba) nothrow @nogc pure 988 { 989 int asInt = *cast(int*)(&rgba); 990 __m128i packed = _mm_cvtsi32_si128(asInt); 991 __m128i mmZero = _mm_setzero_si128(); 992 __m128i shorts = _mm_unpacklo_epi8(packed, mmZero); 993 __m128i ints = _mm_unpacklo_epi16(shorts, mmZero); 994 return _mm_cvtepi32_ps(ints) * _mm_set1_ps(div255); 995 } 996 997 __m128 convertNormalToFloat4(RGBf normal) nothrow @nogc pure 998 { 999 return _mm_setr_ps(normal.r, normal.g, normal.b, 0.0f); 1000 } 1001 1002 __m128 convertVec4fToFloat4(vec4f vec) nothrow @nogc pure 1003 { 1004 return _mm_setr_ps(vec.x, vec.y, vec.z, vec.w); 1005 } 1006 1007 private enum float div255 = 1 / 255.0f; 1008 1009 1010 // Removed Options: 1011 version(legacyBlinnPhong) 1012 { 1013 static assert("legacyBlinnPhong was removed in Dplug v13"); 1014 } 1015 1016 version(legacyPBRNormals) 1017 { 1018 static assert("legacyPBRNormals was removed in Dplug v12"); 1019 }