1 /**
2 * Original fixed-function PBR rendering in Dplug.
3 * For compatibility purpose.
4 *
5 * Copyright: Copyright Auburn Sounds 2015-2019.
6 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7 */
8 module dplug.gui.legacypbr;
9 
10 
11 import core.stdc.stdio;
12 import std.math;
13 
14 import dplug.math.vector;
15 import dplug.math.box;
16 import dplug.math.matrix;
17 
18 import dplug.core.vec;
19 import dplug.core.nogc;
20 import dplug.core.math;
21 import dplug.core.thread;
22 
23 import dplug.gui.compositor;
24 
25 import dplug.graphics;
26 import dplug.window.window;
27 
28 import dplug.gui.ransac;
29 
30 import inteli.math;
31 import inteli.emmintrin;
32 import dplug.gui.profiler;
33 
34 // TODO: PBR rendering doesn't depend rightly on size of the plugin.
35 //       The #RESIZE tag below makrs all areas that needs updating.
36 
37 
38 /// When inheriging from `MultipassCompositor`, you can define what the passes exchange 
39 /// between each other. However, the first field has to be a `CompositorPassBuffers`.
40 struct PBRCompositorPassBuffers
41 {
42     // First field must be `CompositorPassBuffers` for ABI compatibility of `MultipassCompositor`.
43     CompositorPassBuffers parent;
44     alias parent this;
45 
46     // Computed normal, one buffer per thread
47     OwnedImage!RGBf[] normalBuffers;
48 
49     // Accumulates light for each deferred pass, one buffer per thread
50     OwnedImage!RGBAf[] accumBuffers;
51 
52     // Approximate of normal variance, one buffer per thread
53     OwnedImage!L32f[] varianceBuffers;
54 }
55 
56 
57 /// Equivalence factor between Z samples and pixels.
58 /// Tuned once by hand to match the other normal computation algorithm
59 /// This affects virtual geometry, and as such: normals and raymarching into depth.
60 /// Future: this should be modifiable in order to have more Z range in plugins (more 3D).
61 /// Bug: resizing should affect this factor.
62 enum float FACTOR_Z = 4655.0f; // #RESIZE: this factor depends on DPI
63 
64 /// Originally, Dplug compositor was fixed function.
65 /// This is the legacy compositor.
66 class PBRCompositor : MultipassCompositor
67 {
68 nothrow @nogc:
69 
70 
71     // <LEGACY> parameters, reproduced here as properties for compatibility.
72     // Instead you are supposed to tweak settings when creating the passes.
73 
74     void light1Color(vec3f color)
75     {
76         (cast(PassObliqueShadowLight)getPass(PASS_OBLIQUE_SHADOW)).color = color;        
77     }
78 
79     void light2Dir(vec3f dir)
80     {
81         (cast(PassDirectionalLight)getPass(PASS_DIRECTIONAL)).direction = dir;
82     }
83 
84     void light2Color(vec3f color)
85     {
86         (cast(PassDirectionalLight)getPass(PASS_DIRECTIONAL)).color = color;        
87     }
88 
89     void light3Dir(vec3f dir)
90     {
91         (cast(PassSpecularLight)getPass(PASS_SPECULAR)).direction = dir;
92     }
93 
94     void light3Color(vec3f color)
95     {
96         (cast(PassSpecularLight)getPass(PASS_SPECULAR)).color = color;        
97     }
98 
99     void skyboxAmount(float amount)
100     {
101         (cast(PassSkyboxReflections)getPass(PASS_SKYBOX)).amount = amount;
102     }
103 
104     void ambientLight(float amount)
105     {
106         (cast(PassAmbientOcclusion)getPass(PASS_AO)).amount = amount;
107     }
108 
109     // </LEGACY>
110 
111 
112 
113     private enum // MUST be kept in sync with below passes, it's for legacy purpose
114     {
115         PASS_NORMAL      = 0,
116         PASS_AO          = 1,
117         PASS_OBLIQUE_SHADOW = 2,
118         PASS_DIRECTIONAL = 3,
119         PASS_SPECULAR    = 4,
120         PASS_SKYBOX      = 5,
121         PASS_EMISSIVE    = 6,
122         PASS_CLAMP       = 7
123     }
124 
125     this(CompositorCreationContext* context)
126     {
127         super(context);
128 
129         _normalBuffers = mallocSlice!(OwnedImage!RGBf)(numThreads());
130         _accumBuffers = mallocSlice!(OwnedImage!RGBAf)(numThreads());
131         _varianceBuffers = mallocSlice!(OwnedImage!L32f)(numThreads());
132 
133         for (int t = 0; t < numThreads(); ++t)
134         {
135             _normalBuffers[t] = mallocNew!(OwnedImage!RGBf)();
136             _accumBuffers[t] = mallocNew!(OwnedImage!RGBAf)();
137             _varianceBuffers[t] = mallocNew!(OwnedImage!L32f)();
138         }
139 
140         // Create the passes
141         addPass( mallocNew!PassComputeNormal(this) );         // PASS_NORMAL
142         addPass( mallocNew!PassAmbientOcclusion(this) );      // PASS_AO
143         addPass( mallocNew!PassObliqueShadowLight(this) );    // PASS_OBLIQUE_SHADOW
144         addPass( mallocNew!PassDirectionalLight(this) );      // PASS_DIRECTIONAL
145         addPass( mallocNew!PassSpecularLight(this) );         // PASS_SPECULAR
146         addPass( mallocNew!PassSkyboxReflections(this) );     // PASS_SKYBOX
147         addPass( mallocNew!PassEmissiveContribution(this) );  // PASS_EMISSIVE
148         addPass( mallocNew!PassClampAndConvertTo8bit(this) ); // PASS_CLAMP
149     }
150 
151     ~this()
152     {
153         for (size_t t = 0; t < _normalBuffers.length; ++t)
154         {
155             _normalBuffers[t].destroyFree();
156             _accumBuffers[t].destroyFree();
157             _varianceBuffers[t].destroyFree();
158         }
159         freeSlice(_normalBuffers);
160         freeSlice(_accumBuffers);
161         freeSlice(_varianceBuffers);
162     }
163 
164     override void resizeBuffers(int width, 
165                                 int height,
166                                 int areaMaxWidth,
167                                 int areaMaxHeight)
168     {
169         super.resizeBuffers(width, height, areaMaxWidth, areaMaxHeight);
170 
171         // Create numThreads thread-local buffers of areaMaxWidth x areaMaxHeight size.
172         for (int t = 0; t < numThreads(); ++t)
173         {
174 
175             int border_0 = 0;
176             int rowAlign_1 = 1;
177             int rowAlign_16 = 16;
178             _normalBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_1);
179             _accumBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_16);
180             _varianceBuffers[t].size(areaMaxWidth, areaMaxHeight, border_0, rowAlign_1);
181         }
182     }
183 
184 
185     override void compositeTile(ImageRef!RGBA wfb, 
186                                 const(box2i)[] areas,
187                                 Mipmap!RGBA diffuseMap,
188                                 Mipmap!RGBA materialMap,
189                                 Mipmap!L16 depthMap,
190                                 IProfiler profiler)
191     {
192         // Call each pass in sequence
193         PBRCompositorPassBuffers buffers;
194         buffers.outputBuf = &wfb;
195         buffers.diffuseMap = diffuseMap;
196         buffers.materialMap = materialMap;
197         buffers.depthMap = depthMap;
198         buffers.accumBuffers = _accumBuffers;
199         buffers.normalBuffers = _normalBuffers;
200         buffers.varianceBuffers = _varianceBuffers;
201 
202         // For each tile, do all pass one by one.
203         void compositeOneTile(int i, int threadIndex) nothrow @nogc
204         {
205             OwnedImage!RGBAf accumBuffer = _accumBuffers[threadIndex];
206 
207             version(Dplug_ProfileUI) 
208             {
209                 profiler.category("PBR");
210             }
211 
212             box2i area = areas[i];
213             // Clear the accumulation buffer, since all passes add to it
214             {
215                 RGBAf zero = RGBAf(0.0f, 0.0f, 0.0f, 0.0f);
216                 for (int j = 0; j < area.height; ++j)
217                 {
218                     RGBAf* accumScan = accumBuffer.scanline(j).ptr;
219                     accumScan[0..area.width] = zero;
220                 }
221             }
222 
223             
224 
225             foreach(pass; passes())
226             {
227                 version(Dplug_ProfileUI) 
228                 {
229                     char[96] buf;
230                     snprintf(buf.ptr, 96, "Pass %s".ptr, pass.name.ptr);
231                     profiler.begin(buf);
232                 }
233 
234                 pass.renderIfActive(threadIndex, area, cast(CompositorPassBuffers*)&buffers);
235 
236                 version(Dplug_ProfileUI) 
237                 {
238                     profiler.end;
239                 }
240             }
241         }
242         int numAreas = cast(int)areas.length;
243         threadPool().parallelFor(numAreas, &compositeOneTile);
244     }
245 
246 private:
247     OwnedImage!RGBf[] _normalBuffers; // store computed normals
248     OwnedImage!RGBAf[] _accumBuffers; // store accumulated color
249     OwnedImage!L32f[] _varianceBuffers; // store computed normal variance, useful for anti-aliasing
250 }
251 
252 // Compute normals from depth, and normal variance.
253 class PassComputeNormal : CompositorPass
254 {
255 nothrow:
256 @nogc:
257 
258     this(MultipassCompositor parent)
259     {
260         super(parent);
261     }
262 
263     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
264     {
265         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
266         OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex];
267         OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0];
268         OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex];
269 
270         const int depthPitchBytes = depthLevel0.pitchInBytes();
271 
272         for (int j = area.min.y; j < area.max.y; ++j)
273         {
274             RGBf* normalScan = normalBuffer.scanline(j - area.min.y).ptr;
275             L32f* varianceScan = varianceBuffer.scanline(j - area.min.y).ptr;
276 
277             // Note: because the level 0 of depth map has a border of 1 and a trailingSamples of 2,
278             //       then we are allowed to read 4 depth samples at once.
279             const(L16)* depthScan   = depthLevel0.scanlinePtr(j);
280 
281             for (int i = area.min.x; i < area.max.x; ++i)
282             {
283                 // Compute normal
284                 {
285                     const(L16)* depthHere = depthScan + i;
286                     const(L16)* depthHereM1 = cast(const(L16)*) ( cast(const(ubyte)*)depthHere - depthPitchBytes );
287                     const(L16)* depthHereP1 = cast(const(L16)*) ( cast(const(ubyte)*)depthHere + depthPitchBytes );
288                     enum float multUshort = 1.0 / FACTOR_Z;
289                     float[9] depthNeighbourhood = void;
290                     depthNeighbourhood[0] = depthHereM1[-1].l * multUshort;
291                     depthNeighbourhood[1] = depthHereM1[ 0].l * multUshort;
292                     depthNeighbourhood[2] = depthHereM1[+1].l * multUshort;
293                     depthNeighbourhood[3] = depthHere[-1].l   * multUshort;
294                     depthNeighbourhood[4] = depthHere[ 0].l   * multUshort;
295                     depthNeighbourhood[5] = depthHere[+1].l   * multUshort;
296                     depthNeighbourhood[6] = depthHereP1[-1].l * multUshort;
297                     depthNeighbourhood[7] = depthHereP1[ 0].l * multUshort;
298                     depthNeighbourhood[8] = depthHereP1[+1].l * multUshort;
299                     vec3f normal = computePlaneFittingNormal(depthNeighbourhood.ptr);
300                     normalScan[i - area.min.x] = RGBf(normal.x, normal.y, normal.z);
301                 }
302 
303                 // Compute normal variance (old method)
304                 {
305                     const(ubyte)* depthHere = cast(const(ubyte)*)(depthScan + i);
306 
307                     // Read 12 depth samples, the rightmost are unused
308                     __m128i depthSamplesM1 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere - depthPitchBytes - 2) );
309                     __m128i depthSamplesP0 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere - 2) );
310                     __m128i depthSamplesP1 = _mm_loadl_epi64( cast(const(__m128i)*)(depthHere + depthPitchBytes - 2) );
311 
312                     // Extend to float
313                     __m128i zero = _mm_setzero_si128();
314                     __m128 depthM1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesM1, zero));
315                     __m128 depthP0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesP0, zero));
316                     __m128 depthP1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(depthSamplesP1, zero));
317 
318                     enum useLaplacian = false;
319                     static if (useLaplacian)
320                     {
321                         // Possible a bit better, not tried further since
322                         // it is a pain to make it match for the passes that uses it.
323                         // 2nd-order-derivative for depth in the X direction
324                         align(16) static immutable float[12] LAPLACIAN =
325                         [
326                             0.25,  0.5, 0.25, 0,
327                             0.5, -3.0,  0.5, 0,
328                             0.25,  0.5, 0.25, 0,
329                         ];
330 
331                         __m128 mul = depthM1 * _mm_load_ps(&LAPLACIAN[0]) 
332                                    + depthP0 * _mm_load_ps(&LAPLACIAN[4])
333                                    + depthP1 * _mm_load_ps(&LAPLACIAN[8]);
334                         float laplace = mul.array[0] + mul.array[1] + mul.array[2] + mul.array[3];
335                         laplace /= 256.0f;
336                         float variance = laplace*laplace;
337                     }
338                     else
339                     {
340                         // 2nd-order-derivative for depth in the X direction
341                         //  1 -2  1
342                         //  1 -2  1
343                         //  1 -2  1
344                         const(__m128) fact_DDX_M1 = _mm_setr_ps( 1.0f, -2.0f,  1.0f, 0.0f);   
345                         __m128 mulForDDX = fact_DDX_M1 * (depthM1 + depthP0 + depthP1);
346                         float depthDX = mulForDDX.array[0] + mulForDDX.array[1] + mulForDDX.array[2];
347 
348                         // 2nd-order-derivative for depth in the Y direction
349                         //  1  1  1
350                         // -2 -2 -2
351                         //  1  1  1
352                         const(__m128) fact_DDY_M1 = _mm_setr_ps( 1.0f,  1.0f,  1.0f, 0.0f);
353                         const(__m128) fact_DDY_P0 = _mm_setr_ps(-2.0f, -2.0f, -2.0f, 0.0f);
354                         __m128 mulForDDY = fact_DDY_M1 * (depthM1 + depthP1) + depthP0 * fact_DDY_P0;
355                         float depthDY = mulForDDY.array[0] + mulForDDY.array[1] + mulForDDY.array[2];
356 
357                         depthDX *= (1 / 256.0f); // #RESIZE: sounds strange
358                         depthDY *= (1 / 256.0f);
359                         float variance = (depthDX * depthDX + depthDY * depthDY);
360                     }
361                     varianceScan[i - area.min.x] = L32f(variance);
362                 }
363             }
364         }
365     }
366 }
367 
368 
369 /// Give light depending on whether the pixels are statistically above their neighbours.
370 class PassAmbientOcclusion : CompositorPass
371 {
372 nothrow:
373 @nogc:
374 
375     float amount = 0.08125f;
376 
377     // TODO: add ambient light color
378 
379     this(MultipassCompositor parent)
380     {
381         super(parent);
382     }
383 
384     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
385     {
386         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
387         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
388         Mipmap!L16 depthMap = PBRbuf.depthMap;
389         OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0];
390         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
391 
392         for (int j = area.min.y; j < area.max.y; ++j)
393         {
394             RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
395             const(L16*) depthScan = depthLevel0.scanlinePtr(j);
396             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
397 
398             for (int i = area.min.x; i < area.max.x; ++i)
399             {
400                 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]);
401 
402                 const(L16)* depthHere = depthScan + i;
403 
404                 float px = i + 0.5f;
405                 float py = j + 0.5f;
406 
407                 // #RESIZE: if the plugin is large, should sample higher in mipmap levels
408 
409                 float avgDepthHere =
410                     ( depthMap.linearSample(1, px, py)
411                         + depthMap.linearSample(2, px, py)
412                         + depthMap.linearSample(3, px, py)
413                         + depthMap.linearSample(4, px, py) ) * 0.25f;
414 
415                 float diff = (*depthHere).l - avgDepthHere;
416 
417                 enum float divider23040 = 1.0f / 23040;
418                 float cavity = (diff + 23040.0f) * divider23040;
419                 if (cavity >= 1)
420                     cavity = 1;
421                 else if (cavity < 0)
422                     cavity = 0;
423 
424                 __m128 color = baseColor * _mm_set1_ps(cavity * amount);
425                 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + color);
426             }
427         }
428     }
429 }
430 
431 class PassObliqueShadowLight : CompositorPass
432 {
433 nothrow:
434 @nogc:
435 
436     /// Color of this light pass.
437     vec3f color = vec3f(0.25f, 0.25f, 0.25f) * 1.3f;
438 
439     this(MultipassCompositor parent)
440     {
441         super(parent);
442     }
443 
444     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
445     {
446         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
447         OwnedImage!L16 depthLevel0 = PBRbuf.depthMap.levels[0];
448         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
449         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
450 
451         // Add a primary light that cast shadows
452         
453         enum float fallOff = 0.78f; // #RESIZE, recompute that table as needed
454 
455         int samples = 11; // #RESIZE ditto
456 
457         static immutable float[11] weights =
458         [
459             1.0f,
460             fallOff,
461             fallOff ^^ 2,
462             fallOff ^^ 3,
463             fallOff ^^ 4,
464             fallOff ^^ 5,
465             fallOff ^^ 6,
466             fallOff ^^ 7,
467             fallOff ^^ 8,
468             fallOff ^^ 9,
469             fallOff ^^ 10
470         ];
471 
472         enum float totalWeights = (1.0f - (fallOff ^^ 11)) / (1.0f - fallOff) - 1;
473         enum float invTotalWeights = 1 / (1.7f * totalWeights);
474 
475         int wholeWidth = depthLevel0.w;
476         int wholeHeight = depthLevel0.h;
477 
478         for (int j = area.min.y; j < area.max.y; ++j)
479         {
480             RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
481 
482             const(L16*) depthScan = depthLevel0.scanlinePtr(j);
483             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
484 
485             for (int i = area.min.x; i < area.max.x; ++i)
486             {
487                 const(L16)* depthHere = depthScan + i;
488                 RGBA ibaseColor = diffuseScan[i];
489                 vec3f baseColor = vec3f(ibaseColor.r, ibaseColor.g, ibaseColor.b) * div255;
490 
491                 float lightPassed = 0.0f;
492 
493                 int depthCenter = (*depthHere).l;
494                 for (int sample = 1; sample < samples; ++sample)
495                 {
496                     int x1 = i + sample;
497                     if (x1 >= wholeWidth)
498                         x1 = wholeWidth - 1;
499                     int x2 = i - sample;
500                     if (x2 < 0)
501                         x2 = 0;
502                     int y = j - sample;
503                     if (y < 0)
504                         y = 0;
505                     int z = depthCenter + sample; // ???
506                     L16* scan = depthLevel0.scanlinePtr(y);
507                     int diff1 = z - scan[x1].l; // FUTURE: use pointer offsets here instead of opIndex
508                     int diff2 = z - scan[x2].l;
509 
510                     float contrib1 = void, 
511                         contrib2 = void;
512 
513                     static immutable float divider15360 = 1.0f / 15360; // BUG: not consistent with FACTOR_Z, this is steeper...
514 
515                     if (diff1 >= 0)
516                         contrib1 = 1;
517                     else if (diff1 < -15360)
518                         contrib1 = 0;
519                     else
520                         contrib1 = (diff1 + 15360) * divider15360;
521 
522                     if (diff2 >= 0)
523                         contrib2 = 1;
524                     else if (diff2 < -15360)
525                         contrib2 = 0;
526                     else
527                         contrib2 = (diff2 + 15360) * divider15360;
528 
529                     lightPassed += (contrib1 + contrib2 * 0.7f) * weights[sample];
530                 }
531                 vec3f finalColor = baseColor * color * (lightPassed * invTotalWeights);
532                 __m128 mmColor = _mm_setr_ps(finalColor.r, finalColor.g, finalColor.b, 0.0f);
533                 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + mmColor);
534             }
535         }
536     }
537 }
538 
539 class PassDirectionalLight : CompositorPass
540 {
541 nothrow:
542 @nogc:
543 public:
544 
545     /// World-space direction. Unsure of the particular space it lives in.
546     vec3f direction = vec3f(0.0f, 1.0f, 0.1f).normalized;
547 
548     /// Color of this light pass.
549     vec3f color = vec3f(0.481f, 0.481f, 0.481f);
550 
551     this(MultipassCompositor parent)
552     {
553         super(parent);
554     }
555 
556     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
557     {
558         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
559         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
560         OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0];
561         OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex];
562         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
563 
564         // secundary light
565         for (int j = area.min.y; j < area.max.y; ++j)
566         {
567             RGBA* materialScan = materialLevel0.scanlinePtr(j);
568             RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
569             RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y);
570             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
571 
572             for (int i = area.min.x; i < area.max.x; ++i)
573             {
574                 RGBf normalFromBuf = normalScan[i - area.min.x];
575                 RGBA materialHere = materialScan[i];
576                 float roughness = materialHere.r * div255;
577                 RGBA ibaseColor = diffuseScan[i];
578                 vec3f baseColor = vec3f(ibaseColor.r, ibaseColor.g, ibaseColor.b) * div255;
579                 vec3f normal = vec3f(normalFromBuf.r, normalFromBuf.g, normalFromBuf.b);
580                 float diffuseFactor = 0.5f + 0.5f * dot(normal, direction);
581                 diffuseFactor = linmap!float(diffuseFactor, 0.24f - roughness * 0.5f, 1, 0, 1.0f);
582                 vec3f finalColor = baseColor * color * diffuseFactor;
583                 accumScan[i - area.min.x] += RGBAf(finalColor.r, finalColor.g, finalColor.b, 0.0f);
584             }
585         }
586     }
587 }
588 
589 class PassSpecularLight : CompositorPass
590 {
591 nothrow:
592 @nogc:
593 public:
594 
595     /// World-space direction. Unsure of the particular space it lives in.
596     vec3f direction = vec3f(0.0f, 1.0f, 0.1f).normalized;
597 
598     /// Color of this light pass.
599     vec3f color = vec3f(0.26f, 0.26f, 0.26f);
600 
601     this(MultipassCompositor parent)
602     {
603         super(parent);
604         _specularFactor.reallocBuffer(numThreads());
605         _exponentFactor.reallocBuffer(numThreads());
606         _toksvigScaleFactor.reallocBuffer(numThreads());
607 
608         // initialize new elements in the array, else realloc wouldn't work well next
609         for (int thread = 0; thread < numThreads(); ++thread)
610         {
611             _specularFactor[thread] = null;
612             _exponentFactor[thread] = null;
613             _toksvigScaleFactor[thread] = null;
614         }
615 
616         for (int roughByte = 0; roughByte < 256; ++roughByte)
617         {
618             _exponentTable[roughByte] = 0.8f * exp( (1-roughByte / 255.0f) * 5.5f);
619 
620             // Convert Phong exponent to Blinn-phong exponent
621             _exponentTable[roughByte] *= 2.8f; // tuned by hand to match the former "legacy" Phong specular highlight. This makes very little difference.
622         }
623 
624     }
625 
626     override void resizeBuffers(int width, 
627                                 int height,
628                                 int areaMaxWidth,
629                                 int areaMaxHeight)
630     {
631         // resize all thread-local buffers
632         for (int thread = 0; thread < numThreads(); ++thread)
633         {
634             _specularFactor[thread].reallocBuffer(width);
635             _exponentFactor[thread].reallocBuffer(width);
636             _toksvigScaleFactor[thread].reallocBuffer(width);
637         }
638     }
639 
640     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
641     {
642         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
643         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
644         OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0];
645         OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex];
646         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
647         OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex];
648 
649         int w = diffuseLevel0.w;
650         int h = diffuseLevel0.h;
651         immutable float invW = 1.0f / w;
652         immutable float invH = 1.0f / h;
653 
654         __m128 mmlight3Dir = _mm_setr_ps(-direction.x, -direction.y, -direction.z, 0.0f);
655         float* pSpecular = _specularFactor[threadIndex].ptr;
656         float* pExponent = _exponentFactor[threadIndex].ptr;
657         float* pToksvigScale = _toksvigScaleFactor[threadIndex].ptr;
658 
659         for (int j = area.min.y; j < area.max.y; ++j)
660         {
661             RGBA* materialScan = materialLevel0.scanlinePtr(j);
662             RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
663             RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y);
664             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
665             L32f* varianceScan = varianceBuffer.scanlinePtr(j - area.min.y);
666 
667             for (int i = area.min.x; i < area.max.x; ++i)
668             {
669                 RGBA materialHere = materialScan[i];
670                 RGBf normalFromBuf = normalScan[i - area.min.x];
671                 __m128 normal = convertNormalToFloat4(normalFromBuf);
672 
673                 // TODO: this should be tuned interactively, maybe it's annoying to feel
674                 //       Need to compute the viewer distance from screen... and DPI.
675                 // #RESIZE
676                 __m128 toEye = _mm_setr_ps(0.5f - i * invW, j * invH - 0.5f, 1.0f, 0.0f);
677                 toEye = _mm_fast_normalize_ps(toEye);
678 
679                 __m128 halfVector = toEye - mmlight3Dir;
680                 halfVector = _mm_fast_normalize_ps(halfVector);
681                 float specularFactor = _mm_dot_ps(halfVector, normal);
682 
683                 if (specularFactor < 1e-3f) 
684                     specularFactor = 1e-3f;
685 
686                 float exponent = _exponentTable[materialHere.r];
687 
688                 // From NVIDIA Technical Brief: "Mipmapping Normal Maps"
689                 // We use normal variance to reduce exponent and scale of the specular
690                 // highlight, which should avoid aliasing.
691                 float VARIANCE_FACTOR = 4e-5f; // was very hard to tune, probably should not be dx*dx+dy*dy?
692                 float variance = varianceScan[i - area.min.x].l;
693                 float Ft = 1.0f / (1.0f + exponent * variance * VARIANCE_FACTOR);
694                 float scaleFactorToksvig = ( (1.0f + exponent * Ft) / (1.0f + exponent) );
695                 assert(scaleFactorToksvig <= 1);
696                 pToksvigScale[i] = scaleFactorToksvig;
697                 pSpecular[i] = specularFactor;
698                 pExponent[i] = exponent * Ft;
699             }
700 
701             // Just the pow operation for this line
702             {
703                 int i = area.min.x;
704                 for (; i + 3 < area.max.x; i += 4)
705                 {
706                     _mm_storeu_ps(&pSpecular[i], _mm_pow_ps(_mm_loadu_ps(&pSpecular[i]), _mm_loadu_ps(&pExponent[i])));
707                 }
708                 for (; i < area.max.x; ++i)
709                 {
710                     pSpecular[i] = _mm_pow_ss(pSpecular[i], pExponent[i]);
711                 }
712             }
713 
714             for (int i = area.min.x; i < area.max.x; ++i)
715             {
716                 float specularFactor = pSpecular[i];
717 
718                 __m128 material = convertMaterialToFloat4(materialScan[i]);
719                 RGBA materialHere = materialScan[i];
720                 float roughness = material.array[0];
721                 float metalness = material.array[1];
722                 float specular  = material.array[2];
723                 __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]);
724                 __m128 mmLightColor = _mm_setr_ps(color.x, color.y, color.z, 0.0f);
725 
726                 float roughFactor = 10 * (1.0f - roughness) * (1 - metalness * 0.5f);
727                 specularFactor = specularFactor * roughFactor * pToksvigScale[i];
728                 __m128 finalColor = baseColor * mmLightColor * _mm_set1_ps(specularFactor * specular);
729 
730                 _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + finalColor);
731             }
732         }
733     }
734 
735     ~this()
736     {
737         foreach(thread; 0..numThreads())
738         {
739             _specularFactor[thread].reallocBuffer(0);
740             _exponentFactor[thread].reallocBuffer(0);
741             _toksvigScaleFactor[thread].reallocBuffer(0);
742         }
743         _specularFactor.reallocBuffer(0);
744         _exponentFactor.reallocBuffer(0);
745         _toksvigScaleFactor.reallocBuffer(0);
746     }
747 
748 private:
749     float[256] _exponentTable;
750 
751     // Note: those are thread-local buffers
752     float[][] _specularFactor;
753     float[][] _exponentFactor; 
754     float[][] _toksvigScaleFactor;
755 }
756 
757 class PassSkyboxReflections : CompositorPass
758 {
759 nothrow:
760 @nogc:
761 public:
762 
763     float amount = 0.52f;
764 
765     this(MultipassCompositor parent)
766     {
767         super(parent);
768     }
769 
770     ~this()
771     {
772         if (_skybox !is null)
773         {
774             _skybox.destroyFree();
775             _skybox = null;
776         }
777     }
778 
779     // Note: take ownership of image
780     // That image must have been built with `mallocNew`
781     void setSkybox(OwnedImage!RGBA image)
782     {
783         if (_skybox !is null)
784         {
785             _skybox.destroyFree();
786             _skybox = null;
787         }
788         _skybox = mallocNew!(Mipmap!RGBA)(12, image);
789         _skybox.generateMipmaps(Mipmap!RGBA.Quality.box);
790     }
791 
792     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
793     {
794         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
795         OwnedImage!RGBA diffuseLevel0 = PBRbuf.diffuseMap.levels[0];
796         OwnedImage!RGBA materialLevel0 = PBRbuf.materialMap.levels[0];
797         OwnedImage!RGBf normalBuffer = PBRbuf.normalBuffers[threadIndex];
798         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
799         OwnedImage!L32f varianceBuffer = PBRbuf.varianceBuffers[threadIndex];
800 
801         int w = diffuseLevel0.w;
802         int h = diffuseLevel0.h;
803         immutable float invW = 1.0f / w;
804         immutable float invH = 1.0f / h;
805 
806         // skybox reflection (use the same shininess as specular)
807         if (_skybox !is null)
808         {
809             for (int j = area.min.y; j < area.max.y; ++j)
810             {
811                 RGBA* materialScan = materialLevel0.scanlinePtr(j);
812                 RGBA* diffuseScan = diffuseLevel0.scanlinePtr(j);
813                 RGBf* normalScan = normalBuffer.scanlinePtr(j - area.min.y);
814                 RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
815                 L32f* varianceScan = varianceBuffer.scanlinePtr(j - area.min.y);
816 
817                 immutable float amountOfSkyboxPixels = _skybox.width * _skybox.height;
818                 
819                 for (int i = area.min.x; i < area.max.x; ++i)
820                 {
821                     // First compute the needed mipmap level for this line
822                     float mipmapLevel = varianceScan[i - area.min.x].l * amountOfSkyboxPixels;
823                     enum float ROUGH_FACT = 6.0f / 255.0f;
824                     float roughness = materialScan[i].r;
825                     mipmapLevel = 0.5f * fastlog2(1.0f + mipmapLevel * 0.00001f) + ROUGH_FACT * roughness;
826 
827                     immutable float fskyX = (_skybox.width - 1.0f);
828                     immutable float fSkyY = (_skybox.height - 1.0f);
829 
830                     immutable float amountFactor = amount * div255;
831 
832                     // TODO: same remark than above about toEye, something to think about
833                     // #RESIZE
834                     __m128 toEye = _mm_setr_ps(0.5f - i * invW, j * invH - 0.5f, 1.0f, 0.0f);
835                     toEye = _mm_fast_normalize_ps(toEye);
836 
837                     __m128 normal = convertNormalToFloat4(normalScan[i - area.min.x]);
838                     __m128 pureReflection = _mm_reflectnormal_ps(toEye, normal);
839                     __m128 material = convertMaterialToFloat4(materialScan[i]);
840                     float metalness = material.array[1];
841                     __m128 baseColor = convertBaseColorToFloat4(diffuseScan[i]);
842                     float skyx = 0.5f + ((0.5f - pureReflection.array[0] * 0.5f) * fskyX);
843                     float skyy = 0.5f + ((0.5f + pureReflection.array[1] * 0.5f) * fSkyY);
844                     __m128 skyColorAtThisPoint = convertVec4fToFloat4( _skybox.linearMipmapSample(mipmapLevel, skyx, skyy) );
845                     __m128 color = baseColor * skyColorAtThisPoint * _mm_set1_ps(metalness * amountFactor);
846                     _mm_store_ps(cast(float*)(&accumScan[i - area.min.x]), _mm_load_ps(cast(float*)(&accumScan[i - area.min.x])) + color);
847                 }
848             }
849         }
850     }
851 
852 private:
853     /// Used for faking environment reflections.
854     Mipmap!RGBA _skybox = null;
855 }
856 
857 class PassEmissiveContribution : CompositorPass
858 {
859 nothrow:
860 @nogc:
861 public:
862 
863     this(MultipassCompositor parent)
864     {
865         super(parent);
866     }
867 
868     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
869     {
870         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
871         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
872         Mipmap!RGBA diffuseMap = PBRbuf.diffuseMap;
873 
874         // Add light emitted by neighbours
875         // Bloom-like.
876         for (int j = area.min.y; j < area.max.y; ++j)
877         {
878             RGBAf* accumScan = accumBuffer.scanlinePtr(j - area.min.y);
879             for (int i = area.min.x; i < area.max.x; ++i)
880             {
881                 float ic = i + 0.5f;
882                 float jc = j + 0.5f;
883 
884                 // Get alpha-premultiplied, avoids to have to do alpha-aware mipmapping
885                 // #RESIZE: more pixels => light travels further
886                 vec4f colorLevel1 = diffuseMap.linearSample(1, ic, jc);
887                 vec4f colorLevel2 = diffuseMap.linearSample(2, ic, jc);
888                 vec4f colorLevel3 = diffuseMap.linearSample(3, ic, jc);
889                 vec4f colorLevel4 = diffuseMap.linearSample(4, ic, jc);
890                 vec4f colorLevel5 = diffuseMap.linearSample(5, ic, jc);
891 
892                 vec4f emitted = colorLevel1 * 0.00117647f;
893                 emitted += colorLevel2      * 0.00176471f;
894                 emitted += colorLevel3      * 0.00147059f;
895                 emitted += colorLevel4      * 0.00088235f;
896                 emitted += colorLevel5      * 0.00058823f;
897                 accumScan[i - area.min.x] += RGBAf(emitted.r, emitted.g, emitted.b, emitted.a);
898             }
899         }
900     }
901 }
902 
903 class PassClampAndConvertTo8bit : CompositorPass
904 {
905 nothrow:
906 @nogc:
907 public:
908 
909     this(MultipassCompositor parent)
910     {
911         super(parent);
912     }
913 
914     override void render(int threadIndex, const(box2i) area, CompositorPassBuffers* buffers)
915     {
916         PBRCompositorPassBuffers* PBRbuf = cast(PBRCompositorPassBuffers*) buffers;
917         OwnedImage!RGBAf accumBuffer = PBRbuf.accumBuffers[threadIndex];
918         ImageRef!RGBA* wfb = PBRbuf.outputBuf;
919         
920         immutable __m128 mm255_99 = _mm_set1_ps(255.99f);
921         immutable __m128i zero = _mm_setzero_si128();
922 
923         // Final pass, clamp, convert to ubyte
924         for (int j = area.min.y; j < area.max.y; ++j)
925         {
926             int* wfb_scan = cast(int*)(wfb.scanline(j).ptr);
927             const(RGBAf)* accumScan = accumBuffer.scanlinePtr(j - area.min.y);            
928 
929             for (int i = area.min.x; i < area.max.x; ++i)
930             {
931                 RGBAf accum = accumScan[i - area.min.x];
932                 __m128 color = _mm_setr_ps(accum.r, accum.g, accum.b, 1.0f);
933                 __m128i icolorD = _mm_cvttps_epi32(color * mm255_99);
934                 __m128i icolorW = _mm_packs_epi32(icolorD, zero);
935                 __m128i icolorB = _mm_packus_epi16(icolorW, zero);
936                 wfb_scan[i] = icolorB.array[0];
937             }
938         }
939     }
940 }
941 
942 
943 
944 
945 private:
946 
947 // log2 approximation by Laurent de Soras
948 // http://www.flipcode.com/archives/Fast_log_Function.shtml
949 float fastlog2(float val) pure nothrow @nogc
950 {
951     union fi_t
952     {
953         int i;
954         float f;
955     }
956 
957     fi_t fi;
958     fi.f = val;
959     int x = fi.i;
960     int log_2 = ((x >> 23) & 255) - 128;
961     x = x & ~(255 << 23);
962     x += 127 << 23;
963     fi.i = x;
964     return fi.f + log_2;
965 }
966 
967 // log2 approximation by Laurent de Soras
968 // http://www.flipcode.com/archives/Fast_log_Function.shtml
969 // Same but 4x at once
970 __m128 _mm_fastlog2_ps(__m128 val) pure nothrow @nogc
971 {
972     __m128i x = _mm_castps_si128(val);
973     __m128i m128 = _mm_set1_epi32(128);
974     __m128i m255 = _mm_set1_epi32(255);
975     __m128i log_2 = _mm_and_si128(_mm_srai_epi32(x, 23), m255) - m128;
976     x = _mm_and_si128(x, _mm_set1_epi32(~(255 << 23)));
977     x = x + _mm_set1_epi32(127 << 23);
978     __m128 fif = _mm_castsi128_ps(x);
979     return fif + _mm_cvtepi32_ps(log_2);
980 }
981 
982 
983 
984 alias convertMaterialToFloat4 = convertBaseColorToFloat4;
985 
986 // Convert a 8-bit color to a normalized 4xfloat color
987 __m128 convertBaseColorToFloat4(RGBA rgba) nothrow @nogc pure
988 {
989     int asInt = *cast(int*)(&rgba);
990     __m128i packed = _mm_cvtsi32_si128(asInt);
991     __m128i mmZero = _mm_setzero_si128();
992     __m128i shorts = _mm_unpacklo_epi8(packed, mmZero);
993     __m128i ints = _mm_unpacklo_epi16(shorts, mmZero);
994     return _mm_cvtepi32_ps(ints) * _mm_set1_ps(div255);
995 }
996 
997 __m128 convertNormalToFloat4(RGBf normal) nothrow @nogc pure
998 {
999     return _mm_setr_ps(normal.r, normal.g, normal.b, 0.0f);
1000 }
1001 
1002 __m128 convertVec4fToFloat4(vec4f vec) nothrow @nogc pure
1003 {
1004     return _mm_setr_ps(vec.x, vec.y, vec.z, vec.w);
1005 }
1006 
1007 private enum float div255 = 1 / 255.0f;
1008 
1009 
1010 // Removed Options:
1011 version(legacyBlinnPhong)
1012 {
1013     static assert("legacyBlinnPhong was removed in Dplug v13");
1014 }
1015 
1016 version(legacyPBRNormals)
1017 {
1018     static assert("legacyPBRNormals was removed in Dplug v12");
1019 }