1 /** 2 * Original fixed-function PBR rendering in Dplug. 3 * For compatibility purpose. 4 * 5 * Copyright: Copyright Auburn Sounds 2015-2019. 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 */ 8 module dplug.gui.legacypbr; 9 10 11 import core.stdc.stdio; 12 import std.math; 13 14 import dplug.math.vector; 15 import dplug.math.box; 16 import dplug.math.matrix; 17 18 import dplug.core.vec; 19 import dplug.core.nogc; 20 import dplug.core.math; 21 import dplug.core.thread; 22 23 import dplug.gui.compositor; 24 25 import dplug.graphics; 26 import dplug.window.window; 27 28 import dplug.gui.ransac; 29 30 import inteli.math; 31 import inteli.smmintrin; 32 import dplug.gui.profiler; 33 34 // FUTURE: introduce a tonemap operator that doesn't break existing things and only "add" to the final render. 35 // TODO: PBR rendering doesn't depend rightly on size of the plugin. 36 // The #RESIZE tag below makrs all areas that needs updating. 37 38 39 /// When inheriging from `MultipassCompositor`, you can define what the passes exchange 40 /// between each other. However, the first field has to be a `CompositorPassBuffers`. 41 struct PBRCompositorPassBuffers 42 { 43 // First field must be `CompositorPassBuffers` for ABI compatibility of `MultipassCompositor`. 44 CompositorPassBuffers parent; 45 alias parent this; 46 47 // Computed normal, one buffer per thread 48 OwnedImage!RGBf[] normalBuffers; 49 50 // Accumulates light for each deferred pass, one buffer per thread 51 OwnedImage!RGBAf[] accumBuffers; 52 53 // Approximate of normal variance, one buffer per thread 54 OwnedImage!L32f[] varianceBuffers; 55 } 56 57 58 /// Equivalence factor between Z samples and pixels. 59 /// Tuned once by hand to match the other normal computation algorithm 60 /// This affects virtual geometry, and as such: normals and raymarching into depth. 61 /// Future: this should be modifiable in order to have more Z range in plugins (more 3D). 62 /// Bug: resizing should affect this factor. 63 enum float FACTOR_Z = 4655.0f; // #RESIZE: this factor depends on DPI 64 65 /// Originally, Dplug compositor was fixed function. 66 /// This is the legacy compositor. 67 class PBRCompositor : MultipassCompositor 68 { 69 nothrow @nogc: 70 71 72 // <LEGACY> parameters, reproduced here as properties for compatibility. 73 // Instead you are supposed to tweak settings when creating the passes. 74 75 void light1Color(vec3f color) 76 { 77 (cast(PassObliqueShadowLight)getPass(PASS_OBLIQUE_SHADOW)).color = color; 78 } 79 80 void light2Dir(vec3f dir) 81 { 82 (cast(PassDirectionalLight)getPass(PASS_DIRECTIONAL)).direction = dir; 83 } 84 85 void light2Color(vec3f color) 86 { 87 (cast(PassDirectionalLight)getPass(PASS_DIRECTIONAL)).color = color; 88 } 89 90 void light3Dir(vec3f dir) 91 { 92 (cast(PassSpecularLight)getPass(PASS_SPECULAR)).direction = dir; 93 } 94 95 void light3Color(vec3f color) 96 { 97 (cast(PassSpecularLight)getPass(PASS_SPECULAR)).color = color; 98 } 99 100 void skyboxAmount(float amount) 101 { 102 (cast(PassSkyboxReflections)getPass(PASS_SKYBOX)).amount = amount; 103 } 104 105 void ambientLight(float amount) 106 { 107 (cast(PassAmbientOcclusion)getPass(PASS_AO)).amount = amount; 108 } 109 110 version(futurePBREmissive) 111 { 112 void tonemapThreshold(float value) 113 { 114 (cast(PassClampAndConvertTo8bit)getPass(PASS_CLAMP)).tonemapThreshold = value; 115 } 116 117 void tonemapRatio(float value) 118 { 119 (cast(PassClampAndConvertTo8bit)getPass(PASS_CLAMP)).tonemapRatio = value; 120 } 121 } 122 123 // </LEGACY> 124 125 126 127 private enum // MUST be kept in sync with below passes, it's for legacy purpose 128 { 129 PASS_NORMAL = 0, 130 PASS_AO = 1, 131 PASS_OBLIQUE_SHADOW = 2, 132 PASS_DIRECTIONAL = 3, 133 PASS_SPECULAR = 4, 134 PASS_SKYBOX = 5, 135 PASS_EMISSIVE = 6, 136 PASS_CLAMP = 7 137 } 138 139 this(CompositorCreationContext* context) 140 { 141 super(context); 142 143 _normalBuffers = mallocSlice!(OwnedImage!RGBf)(numThreads()); 144 _accumBuffers = mallocSlice!(OwnedImage!RGBAf)(numThreads()); 145 _varianceBuffers = mallocSlice!(OwnedImage!L32f)(numThreads()); 146 147 for (int t = 0; t < numThreads(); ++t) 148 { 149 _normalBuffers[t] = mallocNew!(OwnedImage!RGBf)(); 150 _accumBuffers[t] = mallocNew!(OwnedImage!RGBAf)(); 151 _varianceBuffers[t] = mallocNew!(OwnedImage!L32f)(); 152 } 153 154 // Create the passes 155 addPass( mallocNew!PassComputeNormal(this) ); // PASS_NORMAL 156 addPass( mallocNew!PassAmbientOcclusion(this) ); // PASS_AO 157 addPass( mallocNew!PassObliqueShadowLight(this) ); // PASS_OBLIQUE_SHADOW 158 addPass( mallocNew!PassDirectionalLight(this) ); // PASS_DIRECTIONAL 159 addPass( mallocNew!PassSpecularLight(this) ); // PASS_SPECULAR 160 addPass( mallocNew!PassSkyboxReflections(this) ); // PASS_SKYBOX 161 addPass( mallocNew!PassEmissiveContribution(this) ); // PASS_EMISSIVE 162 addPass( mallocNew!PassClampAndConvertTo8bit(this) ); // PASS_CLAMP 163 } 164 165 ~this() 166 { 167 for (size_t t = 0; t < _normalBuffers.length; ++t) 168 { 169 _normalBuffers[t].destroyFree(); 170 _accumBuffers[t].destroyFree(); 171 _varianceBuffers[t].destroyFree(); 172 } 173 freeSlice(_normalBuffers); 174 freeSlice(_accumBuffers); 175 freeSlice(_varianceBuffers); 176 } 177 178 override void resizeBuffers(int width, 179 int height, 180 int areaMaxWidth, 181 int areaMaxHeight) 182 { 183 super.resizeBuffers(width, height, areaMaxWidth, areaMaxHeight); 184 185 // Create numThreads thread-local buffers of areaMaxWidth x areaMaxHeight size. 186 for (int t = 0; t < numThreads(); ++t) 187 { 188 189 int border_0 = 0; 190 int rowAlign_1 = 1; 191 int rowAlign_16 = 16; 192 _normalBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_1); 193 _accumBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_16); 194 _varianceBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_1); 195 } 196 } 197 198 199 override void compositeTile(ImageRef!RGBA wfb, 200 const(box2i)[] areas, 201 Mipmap!RGBA diffuseMap, 202 Mipmap!RGBA materialMap, 203 Mipmap!L16 depthMap, 204 IProfiler profiler) 205 { 206 // Call each pass in sequence 207 PBRCompositorPassBuffers buffers; 208 buffers.outputBuf = &wfb; 209 buffers.diffuseMap = diffuseMap; 210 buffers.materialMap = materialMap; 211 buffers.depthMap = depthMap; 212 buffers.accumBuffers = _accumBuffers; 213 buffers.normalBuffers = _normalBuffers; 214 buffers.varianceBuffers = _varianceBuffers; 215 216 // For each tile, do all pass one by one. 217 void compositeOneTile(int i, int threadIndex) nothrow @nogc 218 { 219 OwnedImage!RGBAf accumBuffer = _accumBuffers[threadIndex]; 220 221 version(Dplug_ProfileUI) 222 { 223 profiler.category("PBR"); 224 } 225 226 box2i area = areas[i]; 227 // Clear the accumulation buffer, since all passes add to it 228 { 229 RGBAf zero = RGBAf(0.0f, 0.0f, 0.0f, 0.0f); 230 for (int j = 0; j < area.height; ++j) 231 { 232 RGBAf* accumScan = accumBuffer.scanline(j).ptr; 233 accumScan[0..area.width] = zero; 234 } 235 } 236 237 238 239 foreach(pass; passes()) 240 { 241 version(Dplug_ProfileUI) 242 { 243 char[96] buf; 244 snprintf(buf.ptr, 96, "Pass %s".ptr, pass.name.ptr); 245 profiler.begin(buf); 246 } 247 248 pass.renderIfActive(threadIndex, area, cast(CompositorPassBuffers*)&buffers); 249 250 version(Dplug_ProfileUI) 251 { 252 profiler.end; 253 } 254 } 255 } 256 int numAreas = cast(int)areas.length; 257 threadPool().parallelFor(numAreas, &compositeOneTile); 258 } 259 260 private: 261 OwnedImage!RGBf[] _normalBuffers; // store computed normals 262 OwnedImage!RGBAf[] _accumBuffers; // store accumulated color 263 OwnedImage!L32f[] _varianceBuffers; // store computed normal variance, useful for anti-aliasing 264 } 265 266 // Compute normals from depth, and normal variance. 267 class PassComputeNormal : CompositorPass 268 { 269 nothrow: 270 @nogc: 271 272 this(MultipassCompositor parent) 273 { 274 super(parent); 275 } 276 277 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 278 { 279 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 280 OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex]; 281 OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0]; 282 OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex]; 283 284 const int depthPitchBytes = depthLevel0.pitchInBytes(); 285 286 for (int j = area.min.y; j < area.max.y; ++j) 287 { 288 RGBf* normalScan = normalBuffer.scanline(j - area.min.y).ptr; 289 L32f* varianceScan = varianceBuffer.scanline(j - area.min.y).ptr; 290 291 // Note: because the level 0 of depth map has a border of 1 and a trailingSamples of 2, 292 // then we are allowed to read 4 depth samples at once. 293 const(L16)* depthScan = depthLevel0.scanlinePtr(j); 294 295 for (int i = area.min.x; i < area.max.x; ++i) 296 { 297 // Compute normal 298 { 299 const(L16)* depthHere = depthScan + i; 300 const(L16)* depthHereM1 = cast(const(L16)*) ( cast(const(ubyte)*)depthHere - depthPitchBytes ); 301 const(L16)* depthHereP1 = cast(const(L16)*) ( cast(const(ubyte)*)depthHere + depthPitchBytes ); 302 enum float multUshort = 1.0 / FACTOR_Z; 303 float[9] depthNeighbourhood = void; 304 depthNeighbourhood[0] = depthHereM1[-1].l * multUshort; 305 depthNeighbourhood[1] = depthHereM1[ 0].l * multUshort; 306 depthNeighbourhood[2] = depthHereM1[+1].l * multUshort; 307 depthNeighbourhood[3] = depthHere[-1].l * multUshort; 308 depthNeighbourhood[4] = depthHere[ 0].l * multUshort; 309 depthNeighbourhood[5] = depthHere[+1].l * multUshort; 310 depthNeighbourhood[6] = depthHereP1[-1].l * multUshort; 311 depthNeighbourhood[7] = depthHereP1[ 0].l * multUshort; 312 depthNeighbourhood[8] = depthHereP1[+1].l * multUshort; 313 vec3f normal = computePlaneFittingNormal(depthNeighbourhood.ptr); 314 normalScan[i - area.min.x] = RGBf(normal.x, normal.y, normal.z); 315 } 316 317 // Compute normal variance (old method) 318 { 319 const(ubyte)* depthHere = cast(const(ubyte)*)(depthScan + i); 320 321 // Read 12 depth samples, the rightmost are unused 322 __m128i depthSamplesM1 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere - depthPitchBytes - 2) ); 323 __m128i depthSamplesP0 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere - 2) ); 324 __m128i depthSamplesP1 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere + depthPitchBytes - 2) ); 325 326 // Extend to float 327 __m128i zero = _mm_setzero_si128(); 328 __m128 depthM1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesM1, zero)); 329 __m128 depthP0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesP0, zero)); 330 __m128 depthP1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesP1, zero)); 331 332 enum useLaplacian = false; 333 static if (useLaplacian) 334 { 335 // Possible a bit better, not tried further since 336 // it is a pain to make it match for the passes that uses it. 337 // 2nd-order-derivative for depth in the X direction 338 align(16) static immutable float[12] LAPLACIAN = 339 [ 340 0.25, 0.5, 0.25, 0, 341 0.5, -3.0, 0.5, 0, 342 0.25, 0.5, 0.25, 0, 343 ]; 344 345 __m128 mul = depthM1 * _mm_load_ps(&LAPLACIAN[0]) 346 + depthP0 * _mm_load_ps(&LAPLACIAN[4]) 347 + depthP1 * _mm_load_ps(&LAPLACIAN[8]); 348 float laplace = mul.array[0] + mul.array[1] + mul.array[2] + mul.array[3]; 349 laplace /= 256.0f; 350 float variance = laplace*laplace; 351 } 352 else 353 { 354 // 2nd-order-derivative for depth in the X direction 355 // 1 -2 1 356 // 1 -2 1 357 // 1 -2 1 358 const(__m128) fact_DDX_M1 = _mm_setr_ps( 1.0f, -2.0f, 1.0f, 0.0f); 359 __m128 mulForDDX = fact_DDX_M1 * (depthM1 + depthP0 + depthP1); 360 float depthDX = mulForDDX.array[0] + mulForDDX.array[1] + mulForDDX.array[2]; 361 362 // 2nd-order-derivative for depth in the Y direction 363 // 1 1 1 364 // -2 -2 -2 365 // 1 1 1 366 const(__m128) fact_DDY_M1 = _mm_setr_ps( 1.0f, 1.0f, 1.0f, 0.0f); 367 const(__m128) fact_DDY_P0 = _mm_setr_ps(-2.0f, -2.0f, -2.0f, 0.0f); 368 __m128 mulForDDY = fact_DDY_M1 * (depthM1 + depthP1) + depthP0 * fact_DDY_P0; 369 float depthDY = mulForDDY.array[0] + mulForDDY.array[1] + mulForDDY.array[2]; 370 371 depthDX *= (1 / 256.0f); // #RESIZE: sounds strange 372 depthDY *= (1 / 256.0f); 373 float variance = (depthDX * depthDX + depthDY * depthDY); 374 } 375 varianceScan[i - area.min.x] = L32f(variance); 376 } 377 } 378 } 379 } 380 } 381 382 383 /// Give light depending on whether the pixels are statistically above their neighbours. 384 class PassAmbientOcclusion : CompositorPass 385 { 386 nothrow: 387 @nogc: 388 389 float amount = 0.08125f; 390 391 // TODO: add ambient light color 392 393 this(MultipassCompositor parent) 394 { 395 super(parent); 396 } 397 398 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 399 { 400 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 401 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 402 Mipmap!L16 depthMap = PBRbuf.depthMap; 403 OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0]; 404 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 405 406 for (int j = area.min.y; j < area.max.y; ++j) 407 { 408 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 409 const(L16*) depthScan = depthLevel0.scanlinePtr(j); 410 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 411 412 for (int i = area.min.x; i < area.max.x; ++i) 413 { 414 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]); 415 416 const(L16)* depthHere = depthScan + i; 417 418 float px = i + 0.5f; 419 float py = j + 0.5f; 420 421 // #RESIZE: if the plugin is large, should sample higher in mipmap levels 422 423 float avgDepthHere = 424 ( depthMap.linearSample(1, px, py) 425 + depthMap.linearSample(2, px, py) 426 + depthMap.linearSample(3, px, py) 427 + depthMap.linearSample(4, px, py) ) * 0.25f; 428 429 float diff = (*depthHere).l - avgDepthHere; 430 431 enum float divider23040 = 1.0f / 23040; 432 float cavity = (diff + 23040.0f) * divider23040; 433 if (cavity >= 1) 434 cavity = 1; 435 else if (cavity < 0) 436 cavity = 0; 437 438 __m128 color = baseColor * _mm_set1_ps(cavity * amount); 439 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + color); 440 } 441 } 442 } 443 } 444 445 class PassObliqueShadowLight : CompositorPass 446 { 447 nothrow: 448 @nogc: 449 450 /// Color of this light pass. 451 vec3f color = vec3f(0.25f, 0.25f, 0.25f) * 1.3f; 452 453 this(MultipassCompositor parent) 454 { 455 super(parent); 456 } 457 458 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 459 { 460 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 461 OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0]; 462 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 463 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 464 465 // Add a primary light that cast shadows 466 467 enum float fallOff = 0.78f; // #RESIZE, recompute that table as needed 468 469 int samples = 11; // #RESIZE ditto 470 471 // PERF: align(16) on weight[1] 472 static immutable float[11] weights = 473 [ 474 1.0f, 475 fallOff, 476 fallOff ^^ 2, 477 fallOff ^^ 3, 478 fallOff ^^ 4, 479 fallOff ^^ 5, 480 fallOff ^^ 6, 481 fallOff ^^ 7, 482 fallOff ^^ 8, 483 fallOff ^^ 9, 484 fallOff ^^ 10 485 ]; 486 487 enum float totalWeights = (1.0f - (fallOff ^^ 11)) / (1.0f - fallOff) - 1; 488 enum float invTotalWeights = 1 / (1.7f * totalWeights); 489 490 int wholeWidth = depthLevel0.w; 491 int wholeHeight = depthLevel0.h; 492 493 for (int j = area.min.y; j < area.max.y; ++j) 494 { 495 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 496 497 const(L16*) depthScan = depthLevel0.scanlinePtr(j); 498 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 499 500 for (int i = area.min.x; i < area.max.x; ++i) 501 { 502 const(L16)* depthHere = depthScan + i; 503 RGBA ibaseColor = diffuseScan[i]; 504 vec3f baseColor = vec3f(ibaseColor.r, ibaseColor.g, ibaseColor.b) * div255; 505 506 float lightPassed = 0.0f; 507 508 int depthCenter = (*depthHere).l; 509 { 510 int sample = 1; 511 __m128 mmZeroesf = _mm_setzero_ps(); 512 __m128i mmZero = _mm_setzero_si128(); 513 __m128 mmOnes = _mm_set1_ps(1.0f); 514 __m128 mm0_7 = _mm_set1_ps(0.7f); 515 __m128i maxX = _mm_set1_epi32(wholeWidth - 1); 516 517 for (; sample + 3 < samples; sample += 4) 518 { 519 __m128i mm0123 = _mm_setr_epi32(0, 1, 2, 3); 520 __m128i mmSample = _mm_set1_epi32(sample) + mm0123; 521 __m128i mmI = _mm_set1_epi32(i); // X coord 522 __m128i mmJ = _mm_set1_epi32(j); // Y coord 523 __m128i x1 = mmI + mmSample; 524 __m128i x2 = mmI - mmSample; 525 __m128i y = mmJ - mmSample; 526 527 // clamp source indices 528 529 // PERF: _mm_min_epi32 and _mm_max_epi32 not available in SSE3, use _mm_min_epi16 instead 530 x1 = _mm_min_epi32(x1, maxX); 531 x2 = _mm_max_epi32(x2, mmZero); 532 y = _mm_max_epi32( y, mmZero); 533 534 __m128i z = _mm_set1_epi32(depthCenter) + mmSample; /// ??? same WTF this makes no sense whatsoever 535 536 L16* scan0 = depthLevel0.scanlinePtr(y.array[0]); 537 L16* scan1 = depthLevel0.scanlinePtr(y.array[1]); 538 L16* scan2 = depthLevel0.scanlinePtr(y.array[2]); 539 L16* scan3 = depthLevel0.scanlinePtr(y.array[3]); 540 541 __m128 diff1 = _mm_cvtepi32_ps( 542 z - _mm_setr_epi32( scan0[x1.array[0]].l, 543 scan1[x1.array[1]].l, 544 scan2[x1.array[2]].l, 545 scan3[x1.array[3]].l ) ); 546 547 __m128 diff2 = _mm_cvtepi32_ps( 548 z - _mm_setr_epi32( scan0[x2.array[0]].l, 549 scan1[x2.array[1]].l, 550 scan2[x2.array[2]].l, 551 scan3[x2.array[3]].l ) ); 552 553 __m128 mmA = _mm_set1_ps(0.00006510416f); // 1 / 15360 554 __m128 contrib1 = _mm_max_ps(mmZeroesf, _mm_min_ps(mmOnes, mmOnes + diff1 * mmA)); 555 __m128 contrib2 = _mm_max_ps(mmZeroesf, _mm_min_ps(mmOnes, mmOnes + diff2 * mmA)); 556 __m128 mmWeight = _mm_loadu_ps(&weights[sample]); 557 __m128 contrib = (contrib1 + contrib2 * mm0_7) * mmWeight; 558 lightPassed += contrib.array[0]; 559 lightPassed += contrib.array[1]; 560 lightPassed += contrib.array[2]; 561 lightPassed += contrib.array[3]; 562 } 563 564 for ( ; sample < samples; ++sample) 565 { 566 int x1 = i + sample; 567 if (x1 >= wholeWidth) 568 x1 = wholeWidth - 1; 569 int x2 = i - sample; 570 if (x2 < 0) 571 x2 = 0; 572 int y = j - sample; 573 if (y < 0) 574 y = 0; 575 int z = depthCenter + sample; // ??? WTF 576 L16* scan = depthLevel0.scanlinePtr(y); 577 578 int diff1 = z - scan[x1].l; // FUTURE: use pointer offsets here instead of opIndex 579 int diff2 = z - scan[x2].l; 580 581 float contrib1 = void, 582 contrib2 = void; 583 584 // Map diff 0 to contribution = 1 585 // Map -15360 to contribution = 0 586 // Clamp otherwise. 587 // In otherwords, this is f(x) = clamp(Ax+B, 0, 1) 588 // with A = 1/15360 589 // B = 1 590 static immutable float divider15360 = 1.0f / 15360; // BUG: not consistent with FACTOR_Z, this is steeper... 591 592 if (diff1 >= 0) 593 contrib1 = 1; 594 else if (diff1 < -15360) 595 contrib1 = 0; 596 else 597 contrib1 = (diff1 + 15360) * divider15360; 598 599 if (diff2 >= 0) 600 contrib2 = 1; 601 else if (diff2 < -15360) 602 contrib2 = 0; 603 else 604 contrib2 = (diff2 + 15360) * divider15360; 605 606 lightPassed += (contrib1 + contrib2 * 0.7f) * weights[sample]; 607 } 608 } 609 vec3f finalColor = baseColor * color * (lightPassed * invTotalWeights); 610 __m128 mmColor = _mm_setr_ps(finalColor.r, finalColor.g, finalColor.b, 0.0f); 611 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + mmColor); 612 } 613 } 614 } 615 } 616 617 class PassDirectionalLight : CompositorPass 618 { 619 nothrow: 620 @nogc: 621 public: 622 623 /// World-space direction. Unsure of the particular space it lives in. 624 vec3f direction = vec3f(0.0f, 1.0f, 0.1f).normalized; 625 626 /// Color of this light pass. 627 vec3f color = vec3f(0.481f, 0.481f, 0.481f); 628 629 this(MultipassCompositor parent) 630 { 631 super(parent); 632 } 633 634 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 635 { 636 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 637 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 638 OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0]; 639 OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex]; 640 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 641 642 // secundary light 643 for (int j = area.min.y; j < area.max.y; ++j) 644 { 645 RGBA* materialScan = materialLevel0.scanlinePtr(j); 646 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 647 RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y); 648 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 649 650 for (int i = area.min.x; i < area.max.x; ++i) 651 { 652 RGBf normalFromBuf = normalScan[i - area.min.x]; 653 RGBA materialHere = materialScan[i]; 654 float roughness = materialHere.r * div255; 655 RGBA ibaseColor = diffuseScan[i]; 656 vec3f baseColor = vec3f(ibaseColor.r, ibaseColor.g, ibaseColor.b) * div255; 657 vec3f normal = vec3f(normalFromBuf.r, normalFromBuf.g, normalFromBuf.b); 658 float diffuseFactor = 0.5f + 0.5f * dot(normal, direction); 659 diffuseFactor = linmap!float(diffuseFactor, 0.24f - roughness * 0.5f, 1, 0, 1.0f); 660 vec3f finalColor = baseColor * color * diffuseFactor; 661 accumScan[i - area.min.x] += RGBAf(finalColor.r, finalColor.g, finalColor.b, 0.0f); 662 } 663 } 664 } 665 } 666 667 class PassSpecularLight : CompositorPass 668 { 669 nothrow: 670 @nogc: 671 public: 672 673 /// World-space direction. Unsure of the particular space it lives in. 674 vec3f direction = vec3f(0.0f, 1.0f, 0.1f).normalized; 675 676 /// Color of this light pass. 677 vec3f color = vec3f(0.26f, 0.26f, 0.26f); 678 679 this(MultipassCompositor parent) 680 { 681 super(parent); 682 _specularFactor.reallocBuffer(numThreads()); 683 _exponentFactor.reallocBuffer(numThreads()); 684 _toksvigScaleFactor.reallocBuffer(numThreads()); 685 686 // initialize new elements in the array, else realloc wouldn't work well next 687 for (int thread = 0; thread < numThreads(); ++thread) 688 { 689 _specularFactor[thread] = null; 690 _exponentFactor[thread] = null; 691 _toksvigScaleFactor[thread] = null; 692 } 693 694 for (int roughByte = 0; roughByte < 256; ++roughByte) 695 { 696 _exponentTable[roughByte] = 0.8f * exp( (1-roughByte / 255.0f) * 5.5f); 697 698 // Convert Phong exponent to Blinn-phong exponent 699 _exponentTable[roughByte] *= 2.8f; // tuned by hand to match the former "legacy" Phong specular highlight. This makes very little difference. 700 } 701 702 } 703 704 override void resizeBuffers(int width, 705 int height, 706 int areaMaxWidth, 707 int areaMaxHeight) 708 { 709 // resize all thread-local buffers 710 for (int thread = 0; thread < numThreads(); ++thread) 711 { 712 _specularFactor[thread].reallocBuffer(width); 713 _exponentFactor[thread].reallocBuffer(width); 714 _toksvigScaleFactor[thread].reallocBuffer(width); 715 } 716 } 717 718 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 719 { 720 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 721 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 722 OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0]; 723 OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex]; 724 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 725 OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex]; 726 727 int w = diffuseLevel0.w; 728 int h = diffuseLevel0.h; 729 immutable float invW = 1.0f / w; 730 immutable float invH = 1.0f / h; 731 732 __m128 mmlight3Dir = _mm_setr_ps(-direction.x, -direction.y, -direction.z, 0.0f); 733 float* pSpecular = _specularFactor[threadIndex].ptr; 734 float* pExponent = _exponentFactor[threadIndex].ptr; 735 float* pToksvigScale = _toksvigScaleFactor[threadIndex].ptr; 736 737 for (int j = area.min.y; j < area.max.y; ++j) 738 { 739 RGBA* materialScan = materialLevel0.scanlinePtr(j); 740 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 741 RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y); 742 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 743 L32f* varianceScan = varianceBuffer.scanlinePtr(j - area.min.y); 744 745 for (int i = area.min.x; i < area.max.x; ++i) 746 { 747 RGBA materialHere = materialScan[i]; 748 RGBf normalFromBuf = normalScan[i - area.min.x]; 749 __m128 normal = convertNormalToFloat4(normalFromBuf); 750 751 // TODO: this should be tuned interactively, maybe it's annoying to feel 752 // Need to compute the viewer distance from screen... and DPI. 753 // #RESIZE 754 __m128 toEye = _mm_setr_ps(0.5f - i * invW, j * invH - 0.5f, 1.0f, 0.0f); 755 toEye = _mm_fast_normalize_ps(toEye); 756 757 __m128 halfVector = toEye - mmlight3Dir; 758 halfVector = _mm_fast_normalize_ps(halfVector); 759 float specularFactor = _mm_dot_ps(halfVector, normal); 760 761 if (specularFactor < 1e-3f) 762 specularFactor = 1e-3f; 763 764 float exponent = _exponentTable[materialHere.r]; 765 766 // From NVIDIA Technical Brief: "Mipmapping Normal Maps" 767 // We use normal variance to reduce exponent and scale of the specular 768 // highlight, which should avoid aliasing. 769 float VARIANCE_FACTOR = 4e-5f; // was very hard to tune, probably should not be dx*dx+dy*dy? 770 float variance = varianceScan[i - area.min.x].l; 771 float Ft = 1.0f / (1.0f + exponent * variance * VARIANCE_FACTOR); 772 float scaleFactorToksvig = ( (1.0f + exponent * Ft) / (1.0f + exponent) ); 773 assert(scaleFactorToksvig <= 1); 774 pToksvigScale[i] = scaleFactorToksvig; 775 pSpecular[i] = specularFactor; 776 pExponent[i] = exponent * Ft; 777 } 778 779 // Just the pow operation for this line 780 { 781 int i = area.min.x; 782 for (; i + 3 < area.max.x; i += 4) 783 { 784 _mm_storeu_ps(&pSpecular[i], _mm_pow_ps(_mm_loadu_ps(&pSpecular[i]), _mm_loadu_ps(&pExponent[i]))); 785 } 786 for (; i < area.max.x; ++i) 787 { 788 pSpecular[i] = _mm_pow_ss(pSpecular[i], pExponent[i]); 789 } 790 } 791 792 for (int i = area.min.x; i < area.max.x; ++i) 793 { 794 float specularFactor = pSpecular[i]; 795 796 __m128 material = convertMaterialToFloat4(materialScan[i]); 797 RGBA materialHere = materialScan[i]; 798 float roughness = material.array[0]; 799 float metalness = material.array[1]; 800 float specular = material.array[2]; 801 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]); 802 __m128 mmLightColor = _mm_setr_ps(color.x, color.y, color.z, 0.0f); 803 804 float roughFactor = 10 * (1.0f - roughness) * (1 - metalness * 0.5f); 805 specularFactor = specularFactor * roughFactor * pToksvigScale[i]; 806 __m128 finalColor = baseColor * mmLightColor * _mm_set1_ps(specularFactor * specular); 807 808 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + finalColor); 809 } 810 } 811 } 812 813 ~this() 814 { 815 foreach(thread; 0..numThreads()) 816 { 817 _specularFactor[thread].reallocBuffer(0); 818 _exponentFactor[thread].reallocBuffer(0); 819 _toksvigScaleFactor[thread].reallocBuffer(0); 820 } 821 _specularFactor.reallocBuffer(0); 822 _exponentFactor.reallocBuffer(0); 823 _toksvigScaleFactor.reallocBuffer(0); 824 } 825 826 private: 827 float[256] _exponentTable; 828 829 // Note: those are thread-local buffers 830 float[][] _specularFactor; 831 float[][] _exponentFactor; 832 float[][] _toksvigScaleFactor; 833 } 834 835 class PassSkyboxReflections : CompositorPass 836 { 837 nothrow: 838 @nogc: 839 public: 840 841 float amount = 0.52f; 842 843 this(MultipassCompositor parent) 844 { 845 super(parent); 846 } 847 848 ~this() 849 { 850 if (_skybox !is null) 851 { 852 _skybox.destroyFree(); 853 _skybox = null; 854 } 855 } 856 857 // Note: take ownership of image 858 // That image must have been built with `mallocNew` 859 void setSkybox(OwnedImage!RGBA image) 860 { 861 if (_skybox !is null) 862 { 863 _skybox.destroyFree(); 864 _skybox = null; 865 } 866 _skybox = mallocNew!(Mipmap!RGBA)(12, image); 867 _skybox.generateMipmaps(Mipmap!RGBA.Quality.box); 868 } 869 870 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 871 { 872 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 873 OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0]; 874 OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0]; 875 OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex]; 876 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 877 OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex]; 878 879 int w = diffuseLevel0.w; 880 int h = diffuseLevel0.h; 881 immutable float invW = 1.0f / w; 882 immutable float invH = 1.0f / h; 883 884 // skybox reflection (use the same shininess as specular) 885 if (_skybox !is null) 886 { 887 for (int j = area.min.y; j < area.max.y; ++j) 888 { 889 RGBA* materialScan = materialLevel0.scanlinePtr(j); 890 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j); 891 RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y); 892 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 893 L32f* varianceScan = varianceBuffer.scanlinePtr(j - area.min.y); 894 895 immutable float amountOfSkyboxPixels = _skybox.width * _skybox.height; 896 897 for (int i = area.min.x; i < area.max.x; ++i) 898 { 899 // First compute the needed mipmap level for this line 900 float mipmapLevel = varianceScan[i - area.min.x].l * amountOfSkyboxPixels; 901 enum float ROUGH_FACT = 6.0f / 255.0f; 902 float roughness = materialScan[i].r; 903 mipmapLevel = 0.5f * fastlog2(1.0f + mipmapLevel * 0.00001f) + ROUGH_FACT * roughness; 904 905 immutable float fskyX = (_skybox.width - 1.0f); 906 immutable float fSkyY = (_skybox.height - 1.0f); 907 908 immutable float amountFactor = amount * div255; 909 910 // TODO: same remark than above about toEye, something to think about 911 // #RESIZE 912 __m128 toEye = _mm_setr_ps(0.5f - i * invW, j * invH - 0.5f, 1.0f, 0.0f); 913 toEye = _mm_fast_normalize_ps(toEye); 914 915 __m128 normal = convertNormalToFloat4(normalScan[i - area.min.x]); 916 __m128 pureReflection = _mm_reflectnormal_ps(toEye, normal); 917 __m128 material = convertMaterialToFloat4(materialScan[i]); 918 float metalness = material.array[1]; 919 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]); 920 float skyx = 0.5f + ((0.5f - pureReflection.array[0] * 0.5f) * fskyX); 921 float skyy = 0.5f + ((0.5f + pureReflection.array[1] * 0.5f) * fSkyY); 922 __m128 skyColorAtThisPoint = convertVec4fToFloat4( _skybox.linearMipmapSample(mipmapLevel, skyx, skyy) ); 923 __m128 color = baseColor * skyColorAtThisPoint * _mm_set1_ps(metalness * amountFactor); 924 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + color); 925 } 926 } 927 } 928 } 929 930 private: 931 /// Used for faking environment reflections. 932 Mipmap!RGBA _skybox = null; 933 } 934 935 class PassEmissiveContribution : CompositorPass 936 { 937 nothrow: 938 @nogc: 939 public: 940 941 this(MultipassCompositor parent) 942 { 943 super(parent); 944 } 945 946 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 947 { 948 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 949 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 950 Mipmap!RGBA diffuseMap = PBRbuf.diffuseMap; 951 952 // Add light emitted by neighbours 953 // Bloom-like. 954 for (int j = area.min.y; j < area.max.y; ++j) 955 { 956 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 957 for (int i = area.min.x; i < area.max.x; ++i) 958 { 959 float ic = i + 0.5f; 960 float jc = j + 0.5f; 961 962 // Get alpha-premultiplied, avoids to have to do alpha-aware mipmapping 963 // #RESIZE: more pixels => light travels further 964 vec4f colorLevel1 = diffuseMap.linearSample(1, ic, jc); 965 vec4f colorLevel2 = diffuseMap.linearSample(2, ic, jc); 966 vec4f colorLevel3 = diffuseMap.linearSample(3, ic, jc); 967 968 version(futurePBREmissive) 969 { 970 // See Issue #827; this was a problem for Emissive highlights. 971 vec4f colorLevel4 = diffuseMap.cubicSample(4, ic, jc); 972 vec4f colorLevel5 = diffuseMap.cubicSample(5, ic, jc); 973 } 974 else 975 { 976 vec4f colorLevel4 = diffuseMap.linearSample(4, ic, jc); 977 vec4f colorLevel5 = diffuseMap.linearSample(5, ic, jc); 978 } 979 980 version(futurePBREmissive) 981 { 982 // What is super nice with the linear-space mipmap in Diffuse, is that 983 // taking a blurred samples seemingly take equal weights in several layers. 984 float noise = (BLUE_NOISE_16x16[(i & 15)*16 + (j & 15)] - 127.5f) * 0.003f; 985 enum float AMT = 0.002f * 0.67f; // good values for Couture: 0.67f (and 0.66f in 2nd pos) 986 vec4f emitted = colorLevel1 * AMT; 987 emitted += colorLevel2 * AMT; 988 emitted += colorLevel3 * AMT; 989 emitted += colorLevel4 * AMT; 990 emitted += colorLevel5 * AMT * (1 + noise); 991 } 992 else 993 { 994 vec4f emitted = colorLevel1 * 0.00117647f; 995 emitted += colorLevel2 * 0.00176471f; 996 emitted += colorLevel3 * 0.00147059f; 997 emitted += colorLevel4 * 0.00088235f; 998 emitted += colorLevel5 * 0.00058823f; 999 } 1000 accumScan[i - area.min.x] += RGBAf(emitted.r, emitted.g, emitted.b, emitted.a); 1001 } 1002 } 1003 } 1004 } 1005 1006 1007 // 16x16 Patch of 8-bit blue noise, tileable. 1008 private static immutable ubyte[256] BLUE_NOISE_16x16 = 1009 [ 1010 127, 194, 167, 79, 64, 173, 22, 83, 167, 105, 119, 250, 201, 34, 214, 145, 1011 233, 56, 13, 251, 203, 124, 243, 42, 216, 34, 73, 175, 133, 64, 185, 73, 1012 93, 156, 109, 144, 34, 98, 153, 138, 187, 238, 155, 46, 13, 102, 247, 0, 1013 28, 180, 46, 218, 183, 13, 212, 69, 13, 92, 126, 228, 211, 161, 117, 197, 1014 134, 240, 121, 75, 234, 88, 53, 170, 109, 204, 59, 22, 86, 141, 38, 222, 1015 81, 205, 13, 59, 160, 198, 129, 252, 0, 147, 176, 193, 244, 71, 173, 56, 1016 22, 168, 104, 139, 22, 114, 38, 220, 101, 231, 77, 34, 113, 13, 189, 96, 1017 253, 148, 227, 190, 246, 174, 66, 155, 28, 50, 164, 131, 217, 151, 232, 128, 1018 115, 69, 34, 50, 93, 13, 209, 85, 192, 120, 248, 64, 90, 28, 208, 42, 1019 0, 200, 215, 79, 125, 148, 239, 136, 181, 22, 206, 13, 185, 108, 59, 179, 1020 90, 130, 159, 182, 235, 42, 106, 0, 56, 99, 226, 140, 157, 237, 77, 165, 1021 249, 28, 105, 13, 61, 170, 224, 75, 202, 163, 114, 81, 46, 22, 137, 223, 1022 189, 53, 219, 142, 196, 28, 122, 154, 254, 42, 28, 242, 196, 210, 119, 38, 1023 149, 86, 118, 245, 71, 96, 213, 13, 88, 178, 66, 129, 171, 0, 99, 69, 1024 178, 13, 207, 38, 159, 187, 50, 132, 236, 146, 191, 95, 53, 229, 163, 241, 1025 46, 225, 102, 135, 0, 230, 110, 199, 61, 0, 221, 22, 150, 83, 112, 22 1026 ]; 1027 1028 class PassClampAndConvertTo8bit : CompositorPass 1029 { 1030 nothrow: 1031 @nogc: 1032 public: 1033 1034 version(futurePBREmissive) 1035 { 1036 /// Normally not much reason to change this. This is the threshold above which colors are 1037 /// allowed to "bleed" into others in a gray way. 1038 float tonemapThreshold = 1.0f; 1039 1040 /// Tuned on Auburn plugins. This brings a sense of dynamic range, 1041 /// possibly lower would be a bit better. 0.3f wins over 0.5f and 1.0f. 1042 float tonemapRatio = 0.3f; 1043 } 1044 1045 this(MultipassCompositor parent) 1046 { 1047 super(parent); 1048 } 1049 1050 override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers) 1051 { 1052 PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers; 1053 OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex]; 1054 ImageRef!RGBA* wfb = PBRbuf.outputBuf; 1055 1056 immutable __m128 mm255_99 = _mm_set1_ps(255.99f); 1057 immutable __m128i zero = _mm_setzero_si128(); 1058 1059 version(futurePBREmissive) 1060 { 1061 float toneRatio = tonemapRatio / 3; 1062 } 1063 1064 // Final pass, clamp, convert to ubyte 1065 for (int j = area.min.y; j < area.max.y; ++j) 1066 { 1067 int* wfb_scan = cast(int*)(wfb.scanline(j).ptr); 1068 const(RGBAf)* accumScan = accumBuffer.scanlinePtr(j - area.min.y); 1069 1070 for (int i = area.min.x; i < area.max.x; ++i) 1071 { 1072 RGBAf accum = accumScan[i - area.min.x]; 1073 __m128 color = _mm_setr_ps(accum.r, accum.g, accum.b, 1.0f); 1074 1075 version(futurePBREmissive) 1076 { 1077 // Try to weight green higher. 1078 // This avoids shifting hue when tonemapping. 1079 __m128 exceed = _mm_max_ps(_mm_setzero_ps(), color - _mm_set1_ps(tonemapThreshold)); 1080 1081 // Compute luma of exceed energy. Note that we're operating in gamma-space still. 1082 // Should it be applied equivalently to all components? not sure 1083 float exceedLuma = 0.212655f * exceed.array[0] 1084 + 0.715158f * exceed.array[1] 1085 + 0.072187f * exceed.array[2]; 1086 1087 // should it be applied equivalently to all components? not sure 1088 color += _mm_set1_ps(exceedLuma * toneRatio); 1089 color.ptr[3] = 1.0f; 1090 } 1091 1092 __m128i icolorD = _mm_cvttps_epi32(color * mm255_99); 1093 __m128i icolorW = _mm_packs_epi32(icolorD, zero); 1094 __m128i icolorB = _mm_packus_epi16(icolorW, zero); 1095 wfb_scan[i] = icolorB.array[0]; 1096 } 1097 } 1098 } 1099 } 1100 1101 1102 1103 1104 private: 1105 1106 // log2 approximation by Laurent de Soras 1107 // http://www.flipcode.com/archives/Fast_log_Function.shtml 1108 float fastlog2(float val) pure nothrow @nogc 1109 { 1110 union fi_t 1111 { 1112 int i; 1113 float f; 1114 } 1115 1116 fi_t fi; 1117 fi.f = val; 1118 int x = fi.i; 1119 int log_2 = ((x >> 23) & 255) - 128; 1120 x = x & ~(255 << 23); 1121 x += 127 << 23; 1122 fi.i = x; 1123 return fi.f + log_2; 1124 } 1125 1126 // log2 approximation by Laurent de Soras 1127 // http://www.flipcode.com/archives/Fast_log_Function.shtml 1128 // Same but 4x at once 1129 __m128 _mm_fastlog2_ps(__m128 val) pure nothrow @nogc 1130 { 1131 __m128i x = _mm_castps_si128(val); 1132 __m128i m128 = _mm_set1_epi32(128); 1133 __m128i m255 = _mm_set1_epi32(255); 1134 __m128i log_2 = _mm_and_si128(_mm_srai_epi32(x, 23), m255) - m128; 1135 x = _mm_and_si128(x, _mm_set1_epi32(~(255 << 23))); 1136 x = x + _mm_set1_epi32(127 << 23); 1137 __m128 fif = _mm_castsi128_ps(x); 1138 return fif + _mm_cvtepi32_ps(log_2); 1139 } 1140 1141 1142 1143 alias convertMaterialToFloat4 = convertBaseColorToFloat4; 1144 1145 // Convert a 8-bit color to a normalized 4xfloat color 1146 __m128 convertBaseColorToFloat4(RGBA rgba) nothrow @nogc pure 1147 { 1148 int asInt = *cast(int*)(&rgba); 1149 __m128i packed = _mm_cvtsi32_si128(asInt); 1150 __m128i mmZero = _mm_setzero_si128(); 1151 __m128i shorts = _mm_unpacklo_epi8(packed, mmZero); 1152 __m128i ints = _mm_unpacklo_epi16(shorts, mmZero); 1153 return _mm_cvtepi32_ps(ints) * _mm_set1_ps(div255); 1154 } 1155 1156 __m128 convertNormalToFloat4(RGBf normal) nothrow @nogc pure 1157 { 1158 return _mm_setr_ps(normal.r, normal.g, normal.b, 0.0f); 1159 } 1160 1161 __m128 convertVec4fToFloat4(vec4f vec) nothrow @nogc pure 1162 { 1163 return _mm_setr_ps(vec.x, vec.y, vec.z, vec.w); 1164 } 1165 1166 private enum float div255 = 1 / 255.0f; 1167 1168 1169 // Removed Options: 1170 version(legacyBlinnPhong) 1171 { 1172 static assert("legacyBlinnPhong was removed in Dplug v13"); 1173 } 1174 1175 version(legacyPBRNormals) 1176 { 1177 static assert("legacyPBRNormals was removed in Dplug v12"); 1178 }