1 /**
2 * Original fixed-function PBR rendering in Dplug.
3 * For compatibility purpose.
4 *
5 * Copyright: Copyright Auburn Sounds 2015-2019.
6 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7 */
8 module dplug.gui.legacypbr;
11 import core.stdc.stdio;
12 import std.math;
14 import dplug.math.vector;
15 import dplug.math.box;
16 import dplug.math.matrix;
18 import dplug.core.vec;
19 import dplug.core.nogc;
20 import dplug.core.math;
21 import dplug.core.thread;
23 import dplug.gui.compositor;
25 import dplug.graphics;
26 import dplug.window.window;
28 import dplug.gui.ransac;
30 import inteli.math;
31 import inteli.smmintrin;
32 import dplug.gui.profiler;
34 // FUTURE: introduce a tonemap operator that doesn't break existing things and only "add" to the final render.
35 // TODO: PBR rendering doesn't depend rightly on size of the plugin.
36 //       The #RESIZE tag below makrs all areas that needs updating.
39 /// When inheriging from `MultipassCompositor`, you can define what the passes exchange 
40 /// between each other. However, the first field has to be a `CompositorPassBuffers`.
41 struct PBRCompositorPassBuffers
42 {
43     // First field must be `CompositorPassBuffers` for ABI compatibility of `MultipassCompositor`.
44     CompositorPassBuffers parent;
45     alias parent this;
47     // Computed normal, one buffer per thread
48     OwnedImage!RGBf[] normalBuffers;
50     // Accumulates light for each deferred pass, one buffer per thread
51     OwnedImage!RGBAf[] accumBuffers;
53     // Approximate of normal variance, one buffer per thread
54     OwnedImage!L32f[] varianceBuffers;
55 }
58 /// Equivalence factor between Z samples and pixels.
59 /// Tuned once by hand to match the other normal computation algorithm
60 /// This affects virtual geometry, and as such: normals and raymarching into depth.
61 /// Future: this should be modifiable in order to have more Z range in plugins (more 3D).
62 /// Bug: resizing should affect this factor.
63 enum float FACTOR_Z = 4655.0f; // #RESIZE: this factor depends on DPI
65 /// Originally, Dplug compositor was fixed function.
66 /// This is the legacy compositor.
67 class PBRCompositor : MultipassCompositor
68 {
69 nothrow @nogc:
72     // <LEGACY> parameters, reproduced here as properties for compatibility.
73     // Instead you are supposed to tweak settings when creating the passes.
75     void light1Color(vec3f color)
76     {
77         (cast(PassObliqueShadowLight)getPass(PASS_OBLIQUE_SHADOW)).color = color;
78     }
80     void light2Dir(vec3f dir)
81     {
82         (cast(PassDirectionalLight)getPass(PASS_DIRECTIONAL)).direction = dir;
83     }
85     void light2Color(vec3f color)
86     {
87         (cast(PassDirectionalLight)getPass(PASS_DIRECTIONAL)).color = color;
88     }
90     void light3Dir(vec3f dir)
91     {
92         (cast(PassSpecularLight)getPass(PASS_SPECULAR)).direction = dir;
93     }
95     void light3Color(vec3f color)
96     {
97         (cast(PassSpecularLight)getPass(PASS_SPECULAR)).color = color;
98     }
100     void skyboxAmount(float amount)
101     {
102         (cast(PassSkyboxReflections)getPass(PASS_SKYBOX)).amount = amount;
103     }
105     void ambientLight(float amount)
106     {
107         (cast(PassAmbientOcclusion)getPass(PASS_AO)).amount = amount;
108     }
110     version(futurePBREmissive)
111     {
112         void tonemapThreshold(float value)
113         {
114             (cast(PassClampAndConvertTo8bit)getPass(PASS_CLAMP)).tonemapThreshold = value;
115         }
117         void tonemapRatio(float value)
118         {
119             (cast(PassClampAndConvertTo8bit)getPass(PASS_CLAMP)).tonemapRatio = value;
120         }
121     }
123     // </LEGACY>
127     private enum // MUST be kept in sync with below passes, it's for legacy purpose
128     {
129         PASS_NORMAL      = 0,
130         PASS_AO          = 1,
131         PASS_OBLIQUE_SHADOW = 2,
132         PASS_DIRECTIONAL = 3,
133         PASS_SPECULAR    = 4,
134         PASS_SKYBOX      = 5,
135         PASS_EMISSIVE    = 6,
136         PASS_CLAMP       = 7
137     }
139     this(CompositorCreationContext* context)
140     {
141         super(context);
143         _normalBuffers = mallocSlice!(OwnedImage!RGBf)(numThreads());
144         _accumBuffers = mallocSlice!(OwnedImage!RGBAf)(numThreads());
145         _varianceBuffers = mallocSlice!(OwnedImage!L32f)(numThreads());
147         for (int t = 0; t < numThreads(); ++t)
148         {
149             _normalBuffers[t] = mallocNew!(OwnedImage!RGBf)();
150             _accumBuffers[t] = mallocNew!(OwnedImage!RGBAf)();
151             _varianceBuffers[t] = mallocNew!(OwnedImage!L32f)();
152         }
154         // Create the passes
155         addPass( mallocNew!PassComputeNormal(this) );         // PASS_NORMAL
156         addPass( mallocNew!PassAmbientOcclusion(this) );      // PASS_AO
157         addPass( mallocNew!PassObliqueShadowLight(this) );    // PASS_OBLIQUE_SHADOW
158         addPass( mallocNew!PassDirectionalLight(this) );      // PASS_DIRECTIONAL
159         addPass( mallocNew!PassSpecularLight(this) );         // PASS_SPECULAR
160         addPass( mallocNew!PassSkyboxReflections(this) );     // PASS_SKYBOX
161         addPass( mallocNew!PassEmissiveContribution(this) );  // PASS_EMISSIVE
162         addPass( mallocNew!PassClampAndConvertTo8bit(this) ); // PASS_CLAMP
163     }
165     ~this()
166     {
167         for (size_t t = 0; t < _normalBuffers.length; ++t)
168         {
169             _normalBuffers[t].destroyFree();
170             _accumBuffers[t].destroyFree();
171             _varianceBuffers[t].destroyFree();
172         }
173         freeSlice(_normalBuffers);
174         freeSlice(_accumBuffers);
175         freeSlice(_varianceBuffers);
176     }
178     override void resizeBuffers(int width, 
179                                 int height,
180                                 int areaMaxWidth,
181                                 int areaMaxHeight)
182     {
183         super.resizeBuffers(width, height, areaMaxWidth, areaMaxHeight);
185         // Create numThreads thread-local buffers of areaMaxWidth x areaMaxHeight size.
186         for (int t = 0; t < numThreads(); ++t)
187         {
189             int border_0 = 0;
190             int rowAlign_1 = 1;
191             int rowAlign_16 = 16;
192             _normalBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_1);
193             _accumBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_16);
194             _varianceBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_1);
195         }
196     }
199     override void compositeTile(ImageRef!RGBA wfb, 
200                                 const(box2i)[] areas,
201                                 Mipmap!RGBA diffuseMap,
202                                 Mipmap!RGBA materialMap,
203                                 Mipmap!L16 depthMap,
204                                 IProfiler profiler)
205     {
206         // Call each pass in sequence
207         PBRCompositorPassBuffers buffers;
208         buffers.outputBuf = &wfb;
209         buffers.diffuseMap = diffuseMap;
210         buffers.materialMap = materialMap;
211         buffers.depthMap = depthMap;
212         buffers.accumBuffers = _accumBuffers;
213         buffers.normalBuffers = _normalBuffers;
214         buffers.varianceBuffers = _varianceBuffers;
216         // For each tile, do all pass one by one.
217         void compositeOneTile(int i, int threadIndex) nothrow @nogc
218         {
219             OwnedImage!RGBAf accumBuffer = _accumBuffers[threadIndex];
221             version(Dplug_ProfileUI) 
222             {
223                 profiler.category("PBR");
224             }
226             box2i area = areas[i];
227             // Clear the accumulation buffer, since all passes add to it
228             {
229                 RGBAf zero = RGBAf(0.0f, 0.0f, 0.0f, 0.0f);
230                 for (int j = 0; j < area.height; ++j)
231                 {
232                     RGBAf* accumScan = accumBuffer.scanline(j).ptr;
233                     accumScan[0..area.width] = zero;
234                 }
235             }
239             foreach(pass; passes())
240             {
241                 version(Dplug_ProfileUI) 
242                 {
243                     char[96] buf;
244                     snprintf(buf.ptr, 96, "Pass %s".ptr, pass.name.ptr);
245                     profiler.begin(buf);
246                 }
248                 pass.renderIfActive(threadIndex, area, cast(CompositorPassBuffers*)&buffers);
250                 version(Dplug_ProfileUI) 
251                 {
252                     profiler.end;
253                 }
254             }
255         }
256         int numAreas = cast(int)areas.length;
257         threadPool().parallelFor(numAreas, &compositeOneTile);
258     }
260 private:
261     OwnedImage!RGBf[] _normalBuffers; // store computed normals
262     OwnedImage!RGBAf[] _accumBuffers; // store accumulated color
263     OwnedImage!L32f[] _varianceBuffers; // store computed normal variance, useful for anti-aliasing
264 }
266 // Compute normals from depth, and normal variance.
267 class PassComputeNormal : CompositorPass
268 {
269 nothrow:
270 @nogc:
272     this(MultipassCompositor parent)
273     {
274         super(parent);
275     }
277     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
278     {
279         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
280         OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex];
281         OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0];
282         OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex];
284         const int depthPitchBytes = depthLevel0.pitchInBytes();
286         for (int j = area.min.y; j < area.max.y; ++j)
287         {
288             RGBf* normalScan = normalBuffer.scanline(j - area.min.y).ptr;
289             L32f* varianceScan = varianceBuffer.scanline(j - area.min.y).ptr;
291             // Note: because the level 0 of depth map has a border of 1 and a trailingSamples of 2,
292             //       then we are allowed to read 4 depth samples at once.
293             const(L16)* depthScan   = depthLevel0.scanlinePtr(j);
295             for (int i = area.min.x; i < area.max.x; ++i)
296             {
297                 // Compute normal
298                 {
299                     const(L16)* depthHere = depthScan + i;
300                     const(L16)* depthHereM1 = cast(const(L16)*) ( cast(const(ubyte)*)depthHere - depthPitchBytes );
301                     const(L16)* depthHereP1 = cast(const(L16)*) ( cast(const(ubyte)*)depthHere + depthPitchBytes );
302                     enum float multUshort = 1.0 / FACTOR_Z;
303                     float[9] depthNeighbourhood = void;
304                     depthNeighbourhood[0] = depthHereM1[-1].l * multUshort;
305                     depthNeighbourhood[1] = depthHereM1[ 0].l * multUshort;
306                     depthNeighbourhood[2] = depthHereM1[+1].l * multUshort;
307                     depthNeighbourhood[3] = depthHere[-1].l   * multUshort;
308                     depthNeighbourhood[4] = depthHere[ 0].l   * multUshort;
309                     depthNeighbourhood[5] = depthHere[+1].l   * multUshort;
310                     depthNeighbourhood[6] = depthHereP1[-1].l * multUshort;
311                     depthNeighbourhood[7] = depthHereP1[ 0].l * multUshort;
312                     depthNeighbourhood[8] = depthHereP1[+1].l * multUshort;
313                     vec3f normal = computePlaneFittingNormal(depthNeighbourhood.ptr);
314                     normalScan[i - area.min.x] = RGBf(normal.x, normal.y, normal.z);
315                 }
317                 // Compute normal variance (old method)
318                 {
319                     const(ubyte)* depthHere = cast(const(ubyte)*)(depthScan + i);
321                     // Read 12 depth samples, the rightmost are unused
322                     __m128i depthSamplesM1 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere - depthPitchBytes - 2) );
323                     __m128i depthSamplesP0 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere - 2) );
324                     __m128i depthSamplesP1 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere + depthPitchBytes - 2) );
326                     // Extend to float
327                     __m128i zero = _mm_setzero_si128();
328                     __m128 depthM1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesM1, zero));
329                     __m128 depthP0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesP0, zero));
330                     __m128 depthP1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesP1, zero));
332                     enum useLaplacian = false;
333                     static if (useLaplacian)
334                     {
335                         // Possible a bit better, not tried further since
336                         // it is a pain to make it match for the passes that uses it.
337                         // 2nd-order-derivative for depth in the X direction
338                         align(16) static immutable float[12] LAPLACIAN =
339                         [
340                             0.25,  0.5, 0.25, 0,
341                             0.5, -3.0,  0.5, 0,
342                             0.25,  0.5, 0.25, 0,
343                         ];
345                         __m128 mul = depthM1 * _mm_load_ps(&LAPLACIAN[0]) 
346                                    + depthP0 * _mm_load_ps(&LAPLACIAN[4])
347                                    + depthP1 * _mm_load_ps(&LAPLACIAN[8]);
348                         float laplace = mul.array[0] + mul.array[1] + mul.array[2] + mul.array[3];
349                         laplace /= 256.0f;
350                         float variance = laplace*laplace;
351                     }
352                     else
353                     {
354                         // 2nd-order-derivative for depth in the X direction
355                         //  1 -2  1
356                         //  1 -2  1
357                         //  1 -2  1
358                         const(__m128) fact_DDX_M1 = _mm_setr_ps( 1.0f, -2.0f,  1.0f, 0.0f);   
359                         __m128 mulForDDX = fact_DDX_M1 * (depthM1 + depthP0 + depthP1);
360                         float depthDX = mulForDDX.array[0] + mulForDDX.array[1] + mulForDDX.array[2];
362                         // 2nd-order-derivative for depth in the Y direction
363                         //  1  1  1
364                         // -2 -2 -2
365                         //  1  1  1
366                         const(__m128) fact_DDY_M1 = _mm_setr_ps( 1.0f,  1.0f,  1.0f, 0.0f);
367                         const(__m128) fact_DDY_P0 = _mm_setr_ps(-2.0f, -2.0f, -2.0f, 0.0f);
368                         __m128 mulForDDY = fact_DDY_M1 * (depthM1 + depthP1) + depthP0 * fact_DDY_P0;
369                         float depthDY = mulForDDY.array[0] + mulForDDY.array[1] + mulForDDY.array[2];
371                         depthDX *= (1 / 256.0f); // #RESIZE: sounds strange
372                         depthDY *= (1 / 256.0f);
373                         float variance = (depthDX * depthDX + depthDY * depthDY);
374                     }
375                     varianceScan[i - area.min.x] = L32f(variance);
376                 }
377             }
378         }
379     }
380 }
383 /// Give light depending on whether the pixels are statistically above their neighbours.
384 class PassAmbientOcclusion : CompositorPass
385 {
386 nothrow:
387 @nogc:
389     float amount = 0.08125f;
391     // TODO: add ambient light color
393     this(MultipassCompositor parent)
394     {
395         super(parent);
396     }
398     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
399     {
400         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
401         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
402         Mipmap!L16 depthMap = PBRbuf.depthMap;
403         OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0];
404         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
406         for (int j = area.min.y; j < area.max.y; ++j)
407         {
408             RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
409             const(L16*) depthScan = depthLevel0.scanlinePtr(j);
410             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
412             for (int i = area.min.x; i < area.max.x; ++i)
413             {
414                 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]);
416                 const(L16)* depthHere = depthScan + i;
418                 float px = i + 0.5f;
419                 float py = j + 0.5f;
421                 // #RESIZE: if the plugin is large, should sample higher in mipmap levels
423                 float avgDepthHere =
424                     ( depthMap.linearSample(1, px, py)
425                         + depthMap.linearSample(2, px, py)
426                         + depthMap.linearSample(3, px, py)
427                         + depthMap.linearSample(4, px, py) ) * 0.25f;
429                 float diff = (*depthHere).l - avgDepthHere;
431                 enum float divider23040 = 1.0f / 23040;
432                 float cavity = (diff + 23040.0f) * divider23040;
433                 if (cavity >= 1)
434                     cavity = 1;
435                 else if (cavity < 0)
436                     cavity = 0;
438                 __m128 color = baseColor * _mm_set1_ps(cavity * amount);
439                 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + color);
440             }
441         }
442     }
443 }
445 class PassObliqueShadowLight : CompositorPass
446 {
447 nothrow:
448 @nogc:
450     /// Color of this light pass.
451     vec3f color = vec3f(0.25f, 0.25f, 0.25f) * 1.3f;
453     this(MultipassCompositor parent)
454     {
455         super(parent);
456     }
458     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
459     {
460         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
461         OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0];
462         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
463         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
465         // Add a primary light that cast shadows
467         enum float fallOff = 0.78f; // #RESIZE, recompute that table as needed
469         int samples = 11; // #RESIZE ditto
471         // PERF: align(16) on weight[1]
472         static immutable float[11] weights =
473         [
474             1.0f,
475             fallOff,
476             fallOff ^^ 2,
477             fallOff ^^ 3,
478             fallOff ^^ 4,
479             fallOff ^^ 5,
480             fallOff ^^ 6,
481             fallOff ^^ 7,
482             fallOff ^^ 8,
483             fallOff ^^ 9,
484             fallOff ^^ 10
485         ];
487         enum float totalWeights = (1.0f - (fallOff ^^ 11)) / (1.0f - fallOff) - 1;
488         enum float invTotalWeights = 1 / (1.7f * totalWeights);
490         int wholeWidth = depthLevel0.w;
491         int wholeHeight = depthLevel0.h;
493         for (int j = area.min.y; j < area.max.y; ++j)
494         {
495             RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
497             const(L16*) depthScan = depthLevel0.scanlinePtr(j);
498             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
500             for (int i = area.min.x; i < area.max.x; ++i)
501             {
502                 const(L16)* depthHere = depthScan + i;
503                 RGBA ibaseColor = diffuseScan[i];
504                 vec3f baseColor = vec3f(ibaseColor.r, ibaseColor.g, ibaseColor.b) * div255;
506                 float lightPassed = 0.0f;
508                 int depthCenter = (*depthHere).l;
509                 {
510                     int sample = 1;
511                     __m128 mmZeroesf = _mm_setzero_ps();
512                     __m128i mmZero = _mm_setzero_si128();
513                     __m128 mmOnes = _mm_set1_ps(1.0f);
514                     __m128 mm0_7 = _mm_set1_ps(0.7f);
515                     __m128i maxX = _mm_set1_epi32(wholeWidth - 1);
517                     for (; sample + 3 < samples; sample += 4)
518                     {
519                         __m128i mm0123 =  _mm_setr_epi32(0, 1, 2, 3);
520                         __m128i mmSample = _mm_set1_epi32(sample) + mm0123;
521                         __m128i mmI = _mm_set1_epi32(i); // X coord
522                         __m128i mmJ = _mm_set1_epi32(j); // Y coord
523                         __m128i x1 = mmI + mmSample;
524                         __m128i x2 = mmI - mmSample;
525                         __m128i y  = mmJ - mmSample;
527                         // clamp source indices
529                         // PERF: _mm_min_epi32 and _mm_max_epi32 not available in SSE3, use _mm_min_epi16 instead
530                         x1 = _mm_min_epi32(x1, maxX);
531                         x2 = _mm_max_epi32(x2, mmZero);
532                         y  = _mm_max_epi32( y, mmZero);
534                         __m128i z = _mm_set1_epi32(depthCenter) + mmSample; /// ??? same WTF this makes no sense whatsoever
536                         L16* scan0 = depthLevel0.scanlinePtr(y.array[0]);
537                         L16* scan1 = depthLevel0.scanlinePtr(y.array[1]);
538                         L16* scan2 = depthLevel0.scanlinePtr(y.array[2]);
539                         L16* scan3 = depthLevel0.scanlinePtr(y.array[3]);
541                         __m128 diff1 = _mm_cvtepi32_ps(
542                                        z - _mm_setr_epi32( scan0[x1.array[0]].l, 
543                                                            scan1[x1.array[1]].l,
544                                                            scan2[x1.array[2]].l,
545                                                            scan3[x1.array[3]].l ) );
547                         __m128 diff2 = _mm_cvtepi32_ps(
548                                        z - _mm_setr_epi32( scan0[x2.array[0]].l, 
549                                                            scan1[x2.array[1]].l,
550                                                            scan2[x2.array[2]].l,
551                                                            scan3[x2.array[3]].l ) );
553                         __m128 mmA = _mm_set1_ps(0.00006510416f); // 1 / 15360
554                         __m128 contrib1 = _mm_max_ps(mmZeroesf, _mm_min_ps(mmOnes, mmOnes + diff1 * mmA));
555                         __m128 contrib2 = _mm_max_ps(mmZeroesf, _mm_min_ps(mmOnes, mmOnes + diff2 * mmA));
556                         __m128 mmWeight = _mm_loadu_ps(&weights[sample]);
557                         __m128 contrib = (contrib1 + contrib2 * mm0_7) * mmWeight;
558                         lightPassed += contrib.array[0];
559                         lightPassed += contrib.array[1];
560                         lightPassed += contrib.array[2];
561                         lightPassed += contrib.array[3];
562                     }
564                     for ( ; sample < samples; ++sample)
565                     {
566                         int x1 = i + sample;
567                         if (x1 >= wholeWidth)
568                             x1 = wholeWidth - 1;
569                         int x2 = i - sample;
570                         if (x2 < 0)
571                             x2 = 0;
572                         int y = j - sample;
573                         if (y < 0)
574                             y = 0;
575                         int z = depthCenter + sample; // ??? WTF
576                         L16* scan = depthLevel0.scanlinePtr(y);
578                         int diff1 = z - scan[x1].l; // FUTURE: use pointer offsets here instead of opIndex
579                         int diff2 = z - scan[x2].l;
581                         float contrib1 = void, 
582                             contrib2 = void;
584                         // Map diff 0 to contribution = 1
585                         // Map -15360 to contribution = 0
586                         // Clamp otherwise.
587                         // In otherwords, this is f(x) = clamp(Ax+B, 0, 1) 
588                         //                                with A = 1/15360
589                         //                                     B = 1
590                         static immutable float divider15360 = 1.0f / 15360; // BUG: not consistent with FACTOR_Z, this is steeper...
592                         if (diff1 >= 0)
593                             contrib1 = 1;
594                         else if (diff1 < -15360)
595                             contrib1 = 0;
596                         else
597                             contrib1 = (diff1 + 15360) * divider15360;
599                         if (diff2 >= 0)
600                             contrib2 = 1;
601                         else if (diff2 < -15360)
602                             contrib2 = 0;
603                         else
604                             contrib2 = (diff2 + 15360) * divider15360;
606                         lightPassed += (contrib1 + contrib2 * 0.7f) * weights[sample];
607                     }
608                 }
609                 vec3f finalColor = baseColor * color * (lightPassed * invTotalWeights);
610                 __m128 mmColor = _mm_setr_ps(finalColor.r, finalColor.g, finalColor.b, 0.0f);
611                 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + mmColor);
612             }
613         }
614     }
615 }
617 class PassDirectionalLight : CompositorPass
618 {
619 nothrow:
620 @nogc:
621 public:
623     /// World-space direction. Unsure of the particular space it lives in.
624     vec3f direction = vec3f(0.0f, 1.0f, 0.1f).normalized;
626     /// Color of this light pass.
627     vec3f color = vec3f(0.481f, 0.481f, 0.481f);
629     this(MultipassCompositor parent)
630     {
631         super(parent);
632     }
634     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
635     {
636         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
637         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
638         OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0];
639         OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex];
640         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
642         // secundary light
643         for (int j = area.min.y; j < area.max.y; ++j)
644         {
645             RGBA* materialScan = materialLevel0.scanlinePtr(j);
646             RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
647             RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y);
648             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
650             for (int i = area.min.x; i < area.max.x; ++i)
651             {
652                 RGBf normalFromBuf = normalScan[i - area.min.x];
653                 RGBA materialHere = materialScan[i];
654                 float roughness = materialHere.r * div255;
655                 RGBA ibaseColor = diffuseScan[i];
656                 vec3f baseColor = vec3f(ibaseColor.r, ibaseColor.g, ibaseColor.b) * div255;
657                 vec3f normal = vec3f(normalFromBuf.r, normalFromBuf.g, normalFromBuf.b);
658                 float diffuseFactor = 0.5f + 0.5f * dot(normal, direction);
659                 diffuseFactor = linmap!float(diffuseFactor, 0.24f - roughness * 0.5f, 1, 0, 1.0f);
660                 vec3f finalColor = baseColor * color * diffuseFactor;
661                 accumScan[i - area.min.x] += RGBAf(finalColor.r, finalColor.g, finalColor.b, 0.0f);
662             }
663         }
664     }
665 }
667 class PassSpecularLight : CompositorPass
668 {
669 nothrow:
670 @nogc:
671 public:
673     /// World-space direction. Unsure of the particular space it lives in.
674     vec3f direction = vec3f(0.0f, 1.0f, 0.1f).normalized;
676     /// Color of this light pass.
677     vec3f color = vec3f(0.26f, 0.26f, 0.26f);
679     this(MultipassCompositor parent)
680     {
681         super(parent);
682         _specularFactor.reallocBuffer(numThreads());
683         _exponentFactor.reallocBuffer(numThreads());
684         _toksvigScaleFactor.reallocBuffer(numThreads());
686         // initialize new elements in the array, else realloc wouldn't work well next
687         for (int thread = 0; thread < numThreads(); ++thread)
688         {
689             _specularFactor[thread] = null;
690             _exponentFactor[thread] = null;
691             _toksvigScaleFactor[thread] = null;
692         }
694         for (int roughByte = 0; roughByte < 256; ++roughByte)
695         {
696             _exponentTable[roughByte] = 0.8f * exp( (1-roughByte / 255.0f) * 5.5f);
698             // Convert Phong exponent to Blinn-phong exponent
699             _exponentTable[roughByte] *= 2.8f; // tuned by hand to match the former "legacy" Phong specular highlight. This makes very little difference.
700         }
702     }
704     override void resizeBuffers(int width, 
705                                 int height,
706                                 int areaMaxWidth,
707                                 int areaMaxHeight)
708     {
709         // resize all thread-local buffers
710         for (int thread = 0; thread < numThreads(); ++thread)
711         {
712             _specularFactor[thread].reallocBuffer(width);
713             _exponentFactor[thread].reallocBuffer(width);
714             _toksvigScaleFactor[thread].reallocBuffer(width);
715         }
716     }
718     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
719     {
720         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
721         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
722         OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0];
723         OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex];
724         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
725         OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex];
727         int w = diffuseLevel0.w;
728         int h = diffuseLevel0.h;
729         immutable float invW = 1.0f / w;
730         immutable float invH = 1.0f / h;
732         __m128 mmlight3Dir = _mm_setr_ps(-direction.x, -direction.y, -direction.z, 0.0f);
733         float* pSpecular = _specularFactor[threadIndex].ptr;
734         float* pExponent = _exponentFactor[threadIndex].ptr;
735         float* pToksvigScale = _toksvigScaleFactor[threadIndex].ptr;
737         for (int j = area.min.y; j < area.max.y; ++j)
738         {
739             RGBA* materialScan = materialLevel0.scanlinePtr(j);
740             RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
741             RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y);
742             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
743             L32f* varianceScan = varianceBuffer.scanlinePtr(j - area.min.y);
745             for (int i = area.min.x; i < area.max.x; ++i)
746             {
747                 RGBA materialHere = materialScan[i];
748                 RGBf normalFromBuf = normalScan[i - area.min.x];
749                 __m128 normal = convertNormalToFloat4(normalFromBuf);
751                 // TODO: this should be tuned interactively, maybe it's annoying to feel
752                 //       Need to compute the viewer distance from screen... and DPI.
753                 // #RESIZE
754                 __m128 toEye = _mm_setr_ps(0.5f - i * invW, j * invH - 0.5f, 1.0f, 0.0f);
755                 toEye = _mm_fast_normalize_ps(toEye);
757                 __m128 halfVector = toEye - mmlight3Dir;
758                 halfVector = _mm_fast_normalize_ps(halfVector);
759                 float specularFactor = _mm_dot_ps(halfVector, normal);
761                 if (specularFactor < 1e-3f) 
762                     specularFactor = 1e-3f;
764                 float exponent = _exponentTable[materialHere.r];
766                 // From NVIDIA Technical Brief: "Mipmapping Normal Maps"
767                 // We use normal variance to reduce exponent and scale of the specular
768                 // highlight, which should avoid aliasing.
769                 float VARIANCE_FACTOR = 4e-5f; // was very hard to tune, probably should not be dx*dx+dy*dy?
770                 float variance = varianceScan[i - area.min.x].l;
771                 float Ft = 1.0f / (1.0f + exponent * variance * VARIANCE_FACTOR);
772                 float scaleFactorToksvig = ( (1.0f + exponent * Ft) / (1.0f + exponent) );
773                 assert(scaleFactorToksvig <= 1);
774                 pToksvigScale[i] = scaleFactorToksvig;
775                 pSpecular[i] = specularFactor;
776                 pExponent[i] = exponent * Ft;
777             }
779             // Just the pow operation for this line
780             {
781                 int i = area.min.x;
782                 for (; i + 3 < area.max.x; i += 4)
783                 {
784                     _mm_storeu_ps(&pSpecular[i], _mm_pow_ps(_mm_loadu_ps(&pSpecular[i]), _mm_loadu_ps(&pExponent[i])));
785                 }
786                 for (; i < area.max.x; ++i)
787                 {
788                     pSpecular[i] = _mm_pow_ss(pSpecular[i], pExponent[i]);
789                 }
790             }
792             for (int i = area.min.x; i < area.max.x; ++i)
793             {
794                 float specularFactor = pSpecular[i];
796                 __m128 material = convertMaterialToFloat4(materialScan[i]);
797                 RGBA materialHere = materialScan[i];
798                 float roughness = material.array[0];
799                 float metalness = material.array[1];
800                 float specular  = material.array[2];
801                 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]);
802                 __m128 mmLightColor = _mm_setr_ps(color.x, color.y, color.z, 0.0f);
804                 float roughFactor = 10 * (1.0f - roughness) * (1 - metalness * 0.5f);
805                 specularFactor = specularFactor * roughFactor * pToksvigScale[i];
806                 __m128 finalColor = baseColor * mmLightColor * _mm_set1_ps(specularFactor * specular);
808                 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + finalColor);
809             }
810         }
811     }
813     ~this()
814     {
815         foreach(thread; 0..numThreads())
816         {
817             _specularFactor[thread].reallocBuffer(0);
818             _exponentFactor[thread].reallocBuffer(0);
819             _toksvigScaleFactor[thread].reallocBuffer(0);
820         }
821         _specularFactor.reallocBuffer(0);
822         _exponentFactor.reallocBuffer(0);
823         _toksvigScaleFactor.reallocBuffer(0);
824     }
826 private:
827     float[256] _exponentTable;
829     // Note: those are thread-local buffers
830     float[][] _specularFactor;
831     float[][] _exponentFactor; 
832     float[][] _toksvigScaleFactor;
833 }
835 class PassSkyboxReflections : CompositorPass
836 {
837 nothrow:
838 @nogc:
839 public:
841     float amount = 0.52f;
843     this(MultipassCompositor parent)
844     {
845         super(parent);
846     }
848     ~this()
849     {
850         if (_skybox !is null)
851         {
852             _skybox.destroyFree();
853             _skybox = null;
854         }
855     }
857     // Note: take ownership of image
858     // That image must have been built with `mallocNew`
859     void setSkybox(OwnedImage!RGBA image)
860     {
861         if (_skybox !is null)
862         {
863             _skybox.destroyFree();
864             _skybox = null;
865         }
866         _skybox = mallocNew!(Mipmap!RGBA)(12, image);
867         _skybox.generateMipmaps(Mipmap!RGBA.Quality.box);
868     }
870     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
871     {
872         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
873         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
874         OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0];
875         OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex];
876         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
877         OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex];
879         int w = diffuseLevel0.w;
880         int h = diffuseLevel0.h;
881         immutable float invW = 1.0f / w;
882         immutable float invH = 1.0f / h;
884         // skybox reflection (use the same shininess as specular)
885         if (_skybox !is null)
886         {
887             for (int j = area.min.y; j < area.max.y; ++j)
888             {
889                 RGBA* materialScan = materialLevel0.scanlinePtr(j);
890                 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
891                 RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y);
892                 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
893                 L32f* varianceScan = varianceBuffer.scanlinePtr(j - area.min.y);
895                 immutable float amountOfSkyboxPixels = _skybox.width * _skybox.height;
897                 for (int i = area.min.x; i < area.max.x; ++i)
898                 {
899                     // First compute the needed mipmap level for this line
900                     float mipmapLevel = varianceScan[i - area.min.x].l * amountOfSkyboxPixels;
901                     enum float ROUGH_FACT = 6.0f / 255.0f;
902                     float roughness = materialScan[i].r;
903                     mipmapLevel = 0.5f * fastlog2(1.0f + mipmapLevel * 0.00001f) + ROUGH_FACT * roughness;
905                     immutable float fskyX = (_skybox.width - 1.0f);
906                     immutable float fSkyY = (_skybox.height - 1.0f);
908                     immutable float amountFactor = amount * div255;
910                     // TODO: same remark than above about toEye, something to think about
911                     // #RESIZE
912                     __m128 toEye = _mm_setr_ps(0.5f - i * invW, j * invH - 0.5f, 1.0f, 0.0f);
913                     toEye = _mm_fast_normalize_ps(toEye);
915                     __m128 normal = convertNormalToFloat4(normalScan[i - area.min.x]);
916                     __m128 pureReflection = _mm_reflectnormal_ps(toEye, normal);
917                     __m128 material = convertMaterialToFloat4(materialScan[i]);
918                     float metalness = material.array[1];
919                     __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]);
920                     float skyx = 0.5f + ((0.5f - pureReflection.array[0] * 0.5f) * fskyX);
921                     float skyy = 0.5f + ((0.5f + pureReflection.array[1] * 0.5f) * fSkyY);
922                     __m128 skyColorAtThisPoint = convertVec4fToFloat4( _skybox.linearMipmapSample(mipmapLevel, skyx, skyy) );
923                     __m128 color = baseColor * skyColorAtThisPoint * _mm_set1_ps(metalness * amountFactor);
924                     _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + color);
925                 }
926             }
927         }
928     }
930 private:
931     /// Used for faking environment reflections.
932     Mipmap!RGBA _skybox = null;
933 }
935 class PassEmissiveContribution : CompositorPass
936 {
937 nothrow:
938 @nogc:
939 public:
941     this(MultipassCompositor parent)
942     {
943         super(parent);
944     }
946     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
947     {
948         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
949         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
950         Mipmap!RGBA diffuseMap = PBRbuf.diffuseMap;
952         // Add light emitted by neighbours
953         // Bloom-like.
954         for (int j = area.min.y; j < area.max.y; ++j)
955         {
956             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
957             for (int i = area.min.x; i < area.max.x; ++i)
958             {
959                 float ic = i + 0.5f;
960                 float jc = j + 0.5f;
962                 // Get alpha-premultiplied, avoids to have to do alpha-aware mipmapping
963                 // #RESIZE: more pixels => light travels further
964                 vec4f colorLevel1 = diffuseMap.linearSample(1, ic, jc);
965                 vec4f colorLevel2 = diffuseMap.linearSample(2, ic, jc);
966                 vec4f colorLevel3 = diffuseMap.linearSample(3, ic, jc);
968                 version(futurePBREmissive)
969                 {
970                     // See Issue #827; this was a problem for Emissive highlights.
971                     vec4f colorLevel4 = diffuseMap.cubicSample(4, ic, jc);
972                     vec4f colorLevel5 = diffuseMap.cubicSample(5, ic, jc);
973                 }
974                 else
975                 {
976                     vec4f colorLevel4 = diffuseMap.linearSample(4, ic, jc);
977                     vec4f colorLevel5 = diffuseMap.linearSample(5, ic, jc);
978                 }
980                 version(futurePBREmissive)
981                 {
982                     // What is super nice with the linear-space mipmap in Diffuse, is that
983                     // taking a blurred samples seemingly take equal weights in several layers.
984                     float noise = (BLUE_NOISE_16x16[(i & 15)*16 + (j & 15)] - 127.5f) * 0.003f;
985                     enum float AMT = 0.002f * 0.67f; // good values for Couture: 0.67f (and 0.66f in 2nd pos)
986                     vec4f emitted = colorLevel1 * AMT;
987                     emitted += colorLevel2      * AMT;
988                     emitted += colorLevel3      * AMT;
989                     emitted += colorLevel4      * AMT;
990                     emitted += colorLevel5      * AMT * (1 + noise);
991                 }
992                 else
993                 {
994                     vec4f emitted = colorLevel1 * 0.00117647f;
995                     emitted += colorLevel2      * 0.00176471f;
996                     emitted += colorLevel3      * 0.00147059f;
997                     emitted += colorLevel4      * 0.00088235f;
998                     emitted += colorLevel5      * 0.00058823f;
999                 }
1000                 accumScan[i - area.min.x] += RGBAf(emitted.r, emitted.g, emitted.b, emitted.a);
1001             }
1002         }
1003     }
1004 }
1007 // 16x16 Patch of 8-bit blue noise, tileable.
1008 private static immutable ubyte[256] BLUE_NOISE_16x16 =
1009 [
1010     127, 194, 167,  79,  64, 173,  22,  83, 167, 105, 119, 250, 201,  34, 214, 145, 
1011     233,  56,  13, 251, 203, 124, 243,  42, 216,  34,  73, 175, 133,  64, 185,  73, 
1012      93, 156, 109, 144,  34,  98, 153, 138, 187, 238, 155,  46,  13, 102, 247,   0,
1013      28, 180,  46, 218, 183,  13, 212,  69,  13,  92, 126, 228, 211, 161, 117, 197, 
1014     134, 240, 121,  75, 234,  88,  53, 170, 109, 204,  59,  22,  86, 141,  38, 222,
1015      81, 205,  13,  59, 160, 198, 129, 252,   0, 147, 176, 193, 244,  71, 173,  56,
1016      22, 168, 104, 139,  22, 114,  38, 220, 101, 231,  77,  34, 113,  13, 189,  96, 
1017     253, 148, 227, 190, 246, 174,  66, 155,  28,  50, 164, 131, 217, 151, 232, 128, 
1018     115,  69,  34,  50,  93,  13, 209,  85, 192, 120, 248,  64,  90,  28, 208,  42,
1019       0, 200, 215,  79, 125, 148, 239, 136, 181,  22, 206,  13, 185, 108,  59, 179,
1020      90, 130, 159, 182, 235,  42, 106,   0,  56,  99, 226, 140, 157, 237,  77, 165, 
1021     249,  28, 105,  13,  61, 170, 224,  75, 202, 163, 114,  81,  46,  22, 137, 223, 
1022     189,  53, 219, 142, 196,  28, 122, 154, 254,  42,  28, 242, 196, 210, 119,  38, 
1023     149,  86, 118, 245,  71,  96, 213,  13,  88, 178,  66, 129, 171,   0,  99,  69, 
1024     178,  13, 207,  38, 159, 187,  50, 132, 236, 146, 191,  95,  53, 229, 163, 241,
1025      46, 225, 102, 135,   0, 230, 110, 199,  61,   0, 221,  22, 150,  83, 112, 22
1026 ];
1028 class PassClampAndConvertTo8bit : CompositorPass
1029 {
1030 nothrow:
1031 @nogc:
1032 public:
1034     version(futurePBREmissive)
1035     {
1036         /// Normally not much reason to change this. This is the threshold above which colors are 
1037         /// allowed to "bleed" into others in a gray way.
1038         float tonemapThreshold = 1.0f;
1040         /// Tuned on Auburn plugins. This brings a sense of dynamic range, 
1041         /// possibly lower would be a bit better. 0.3f wins over 0.5f and 1.0f.
1042         float tonemapRatio     = 0.3f; 
1043     }
1045     this(MultipassCompositor parent)
1046     {
1047         super(parent);
1048     }
1050     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
1051     {
1052         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
1053         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
1054         ImageRef!RGBA* wfb = PBRbuf.outputBuf;
1056         immutable __m128 mm255_99 = _mm_set1_ps(255.99f);
1057         immutable __m128i zero = _mm_setzero_si128();
1059         version(futurePBREmissive)
1060         {
1061             float toneRatio = tonemapRatio / 3;
1062         }
1064         // Final pass, clamp, convert to ubyte
1065         for (int j = area.min.y; j < area.max.y; ++j)
1066         {
1067             int* wfb_scan = cast(int*)(wfb.scanline(j).ptr);
1068             const(RGBAf)* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
1070             for (int i = area.min.x; i < area.max.x; ++i)
1071             {
1072                 RGBAf accum = accumScan[i - area.min.x];
1073                 __m128 color = _mm_setr_ps(accum.r, accum.g, accum.b, 1.0f);
1075                 version(futurePBREmissive)
1076                 {
1077                     // Try to weight green higher.
1078                     // This avoids shifting hue when tonemapping.
1079                     __m128 exceed = _mm_max_ps(_mm_setzero_ps(), color - _mm_set1_ps(tonemapThreshold));
1081                     // Compute luma of exceed energy. Note that we're operating in gamma-space still.
1082                     // Should it be applied equivalently to all components? not sure
1083                     float exceedLuma = 0.212655f * exceed.array[0] 
1084                                      + 0.715158f * exceed.array[1] 
1085                                      + 0.072187f * exceed.array[2];
1087                     // should it be applied equivalently to all components? not sure
1088                     color += _mm_set1_ps(exceedLuma * toneRatio); 
1089                     color.ptr[3] = 1.0f;
1090                 }
1092                 __m128i icolorD = _mm_cvttps_epi32(color * mm255_99);
1093                 __m128i icolorW = _mm_packs_epi32(icolorD, zero);
1094                 __m128i icolorB = _mm_packus_epi16(icolorW, zero);
1095                 wfb_scan[i] = icolorB.array[0];
1096             }
1097         }
1098     }
1099 }
1104 private:
1106 // log2 approximation by Laurent de Soras
1107 // http://www.flipcode.com/archives/Fast_log_Function.shtml
1108 float fastlog2(float val) pure nothrow @nogc
1109 {
1110     union fi_t
1111     {
1112         int i;
1113         float f;
1114     }
1116     fi_t fi;
1117     fi.f = val;
1118     int x = fi.i;
1119     int log_2 = ((x >> 23) & 255) - 128;
1120     x = x & ~(255 << 23);
1121     x += 127 << 23;
1122     fi.i = x;
1123     return fi.f + log_2;
1124 }
1126 // log2 approximation by Laurent de Soras
1127 // http://www.flipcode.com/archives/Fast_log_Function.shtml
1128 // Same but 4x at once
1129 __m128 _mm_fastlog2_ps(__m128 val) pure nothrow @nogc
1130 {
1131     __m128i x = _mm_castps_si128(val);
1132     __m128i m128 = _mm_set1_epi32(128);
1133     __m128i m255 = _mm_set1_epi32(255);
1134     __m128i log_2 = _mm_and_si128(_mm_srai_epi32(x, 23), m255) - m128;
1135     x = _mm_and_si128(x, _mm_set1_epi32(~(255 << 23)));
1136     x = x + _mm_set1_epi32(127 << 23);
1137     __m128 fif = _mm_castsi128_ps(x);
1138     return fif + _mm_cvtepi32_ps(log_2);
1139 }
1143 alias convertMaterialToFloat4 = convertBaseColorToFloat4;
1145 // Convert a 8-bit color to a normalized 4xfloat color
1146 __m128 convertBaseColorToFloat4(RGBA rgba) nothrow @nogc pure
1147 {
1148     int asInt = *cast(int*)(&rgba);
1149     __m128i packed = _mm_cvtsi32_si128(asInt);
1150     __m128i mmZero = _mm_setzero_si128();
1151     __m128i shorts = _mm_unpacklo_epi8(packed, mmZero);
1152     __m128i ints = _mm_unpacklo_epi16(shorts, mmZero);
1153     return _mm_cvtepi32_ps(ints) * _mm_set1_ps(div255);
1154 }
1156 __m128 convertNormalToFloat4(RGBf normal) nothrow @nogc pure
1157 {
1158     return _mm_setr_ps(normal.r, normal.g, normal.b, 0.0f);
1159 }
1161 __m128 convertVec4fToFloat4(vec4f vec) nothrow @nogc pure
1162 {
1163     return _mm_setr_ps(vec.x, vec.y, vec.z, vec.w);
1164 }
1166 private enum float div255 = 1 / 255.0f;
1169 // Removed Options:
1170 version(legacyBlinnPhong)
1171 {
1172     static assert("legacyBlinnPhong was removed in Dplug v13");
1173 }
1175 version(legacyPBRNormals)
1176 {
1177     static assert("legacyPBRNormals was removed in Dplug v12");
1178 }